]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[version] update
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
fe03a6cd 70 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|'
68b91dc9 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
9d5d4d64 88
89 def warn(message):
90 self.report_warning(message)
91
92 # username+password login is broken
93 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
94 self.raise_login_required(
95 'Login details are needed to download this content', method='cookies')
68217024 96 username, password = self._get_login_info()
9d5d4d64 97 if username:
98 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
99 return
100 # Everything below this is broken!
101
b2e8bc1b
JMF
102 # No authentication to be performed
103 if username is None:
a06916d9 104 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 105 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 106 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 107 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 108 return True
b2e8bc1b 109
7cc3570e
PH
110 login_page = self._download_webpage(
111 self._LOGIN_URL, None,
69ea8ca4
PH
112 note='Downloading login page',
113 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
114 if login_page is False:
115 return
b2e8bc1b 116
1212e997 117 login_form = self._hidden_inputs(login_page)
c5e8d7af 118
e00eb564
S
119 def req(url, f_req, note, errnote):
120 data = login_form.copy()
121 data.update({
122 'pstMsg': 1,
123 'checkConnection': 'youtube',
124 'checkedDomains': 'youtube',
125 'hl': 'en',
126 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 127 'f.req': json.dumps(f_req),
e00eb564
S
128 'flowName': 'GlifWebSignIn',
129 'flowEntry': 'ServiceLogin',
baf67a60
S
130 # TODO: reverse actual botguard identifier generation algo
131 'bgRequest': '["identifier",""]',
041bc3ad 132 })
e00eb564
S
133 return self._download_json(
134 url, None, note=note, errnote=errnote,
135 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
136 fatal=False,
137 data=urlencode_postdata(data), headers={
138 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
139 'Google-Accounts-XSRF': 1,
140 })
141
3995d37d
S
142 lookup_req = [
143 username,
144 None, [], None, 'US', None, None, 2, False, True,
145 [
146 None, None,
147 [2, 1, None, 1,
148 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
149 None, [], 4],
150 1, [None, None, []], None, None, None, True
151 ],
152 username,
153 ]
154
e00eb564 155 lookup_results = req(
3995d37d 156 self._LOOKUP_URL, lookup_req,
e00eb564
S
157 'Looking up account info', 'Unable to look up account info')
158
159 if lookup_results is False:
160 return False
041bc3ad 161
3995d37d
S
162 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
163 if not user_hash:
164 warn('Unable to extract user hash')
165 return False
166
167 challenge_req = [
168 user_hash,
169 None, 1, None, [1, None, None, None, [password, None, True]],
170 [
171 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
172 1, [None, None, []], None, None, None, True
173 ]]
83317f69 174
3995d37d
S
175 challenge_results = req(
176 self._CHALLENGE_URL, challenge_req,
177 'Logging in', 'Unable to log in')
83317f69 178
3995d37d 179 if challenge_results is False:
e00eb564 180 return
83317f69 181
3995d37d
S
182 login_res = try_get(challenge_results, lambda x: x[0][5], list)
183 if login_res:
184 login_msg = try_get(login_res, lambda x: x[5], compat_str)
185 warn(
186 'Unable to login: %s' % 'Invalid password'
187 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
188 return False
189
190 res = try_get(challenge_results, lambda x: x[0][-1], list)
191 if not res:
192 warn('Unable to extract result entry')
193 return False
194
9a6628aa
S
195 login_challenge = try_get(res, lambda x: x[0][0], list)
196 if login_challenge:
197 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
198 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
199 # SEND_SUCCESS - TFA code has been successfully sent to phone
200 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 201 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
202 if status == 'QUOTA_EXCEEDED':
203 warn('Exceeded the limit of TFA codes, try later')
204 return False
205
206 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
207 if not tl:
208 warn('Unable to extract TL')
209 return False
210
211 tfa_code = self._get_tfa_info('2-step verification code')
212
213 if not tfa_code:
214 warn(
215 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
216 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
217 return False
218
219 tfa_code = remove_start(tfa_code, 'G-')
220
221 tfa_req = [
222 user_hash, None, 2, None,
223 [
224 9, None, None, None, None, None, None, None,
225 [None, tfa_code, True, 2]
226 ]]
227
228 tfa_results = req(
229 self._TFA_URL.format(tl), tfa_req,
230 'Submitting TFA code', 'Unable to submit TFA code')
231
232 if tfa_results is False:
233 return False
234
235 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
236 if tfa_res:
237 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
238 warn(
239 'Unable to finish TFA: %s' % 'Invalid TFA code'
240 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
241 return False
242
243 check_cookie_url = try_get(
244 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
245 else:
246 CHALLENGES = {
247 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
248 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
249 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
250 }
251 challenge = CHALLENGES.get(
252 challenge_str,
253 '%s returned error %s.' % (self.IE_NAME, challenge_str))
254 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
255 return False
3995d37d
S
256 else:
257 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
258
259 if not check_cookie_url:
260 warn('Unable to extract CheckCookie URL')
261 return False
e00eb564
S
262
263 check_cookie_results = self._download_webpage(
3995d37d
S
264 check_cookie_url, None, 'Checking cookie', fatal=False)
265
266 if check_cookie_results is False:
267 return False
e00eb564 268
3995d37d
S
269 if 'https://myaccount.google.com/' not in check_cookie_results:
270 warn('Unable to log in')
b2e8bc1b 271 return False
e00eb564 272
b2e8bc1b
JMF
273 return True
274
cce889b9 275 def _initialize_consent(self):
276 cookies = self._get_cookies('https://www.youtube.com/')
277 if cookies.get('__Secure-3PSID'):
278 return
279 consent_id = None
280 consent = cookies.get('CONSENT')
281 if consent:
282 if 'YES' in consent.value:
283 return
284 consent_id = self._search_regex(
285 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
286 if not consent_id:
287 consent_id = random.randint(100, 999)
288 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 289
b2e8bc1b 290 def _real_initialize(self):
cce889b9 291 self._initialize_consent()
b2e8bc1b
JMF
292 if self._downloader is None:
293 return
b2e8bc1b
JMF
294 if not self._login():
295 return
c5e8d7af 296
f4f751af 297 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
298 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 299 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 300 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
301 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 302
a5c56234
M
303 def _generate_sapisidhash_header(self):
304 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
305 if sapisid_cookie is None:
306 return
307 time_now = round(time.time())
308 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
309 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
310
311 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 312 note='Downloading API JSON', errnote='Unable to download API page',
313 context=None, api_key=None):
314
315 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 316 data.update(query)
f4f751af 317 real_headers = self._generate_api_headers()
318 real_headers.update({'content-type': 'application/json'})
319 if headers:
320 real_headers.update(headers)
545cc85d 321 return self._download_json(
a5c56234
M
322 'https://www.youtube.com/youtubei/v1/%s' % ep,
323 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 324 data=json.dumps(data).encode('utf8'), headers=real_headers,
325 query={'key': api_key or self._extract_api_key()})
326
327 def _extract_api_key(self, ytcfg=None):
328 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 329
8bdd16b4 330 def _extract_yt_initial_data(self, video_id, webpage):
331 return self._parse_json(
332 self._search_regex(
29f7c58a 333 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 334 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 335 video_id)
0c148415 336
a1c5d2ca
M
337 def _extract_identity_token(self, webpage, item_id):
338 ytcfg = self._extract_ytcfg(item_id, webpage)
339 if ytcfg:
340 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
341 if token:
342 return token
343 return self._search_regex(
344 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
345 'identity token', default=None)
346
347 @staticmethod
348 def _extract_account_syncid(data):
8ea3f7b9 349 """
350 Extract syncId required to download private playlists of secondary channels
351 @param data Either response or ytcfg
352 """
353 sync_ids = (try_get(
354 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
355 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
356 if len(sync_ids) >= 2 and sync_ids[1]:
357 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
358 # and just "user_syncid||" for primary channel. We only want the channel_syncid
359 return sync_ids[0]
8ea3f7b9 360 # ytcfg includes channel_syncid if on secondary channel
361 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 362
29f7c58a 363 def _extract_ytcfg(self, video_id, webpage):
8c54a305 364 if not webpage:
365 return {}
29f7c58a 366 return self._parse_json(
367 self._search_regex(
368 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 369 default='{}'), video_id, fatal=False) or {}
370
371 def __extract_client_version(self, ytcfg):
372 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
373
374 def _extract_context(self, ytcfg=None):
375 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
376 if context:
377 return context
378
379 # Recreate the client context (required)
380 client_version = self.__extract_client_version(ytcfg)
381 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
382 context = {
383 'client': {
384 'clientName': client_name,
385 'clientVersion': client_version,
386 }
387 }
388 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
389 if visitor_data:
390 context['client']['visitorData'] = visitor_data
391 return context
392
393 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
394 headers = {
395 'X-YouTube-Client-Name': '1',
396 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
397 }
398 if identity_token:
399 headers['x-youtube-identity-token'] = identity_token
400 if account_syncid:
401 headers['X-Goog-PageId'] = account_syncid
402 headers['X-Goog-AuthUser'] = 0
403 if visitor_data:
404 headers['x-goog-visitor-id'] = visitor_data
405 auth = self._generate_sapisidhash_header()
406 if auth is not None:
407 headers['Authorization'] = auth
408 headers['X-Origin'] = 'https://www.youtube.com'
409 return headers
29f7c58a 410
9297939e 411 @staticmethod
412 def is_music_url(url):
413 return re.match(r'https?://music\.youtube\.com/', url) is not None
414
30a074c2 415 def _extract_video(self, renderer):
416 video_id = renderer.get('videoId')
417 title = try_get(
418 renderer,
419 (lambda x: x['title']['runs'][0]['text'],
420 lambda x: x['title']['simpleText']), compat_str)
421 description = try_get(
422 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
423 compat_str)
424 duration = parse_duration(try_get(
425 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
426 view_count_text = try_get(
427 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
428 view_count = str_to_int(self._search_regex(
429 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
430 'view count', default=None))
431 uploader = try_get(
bc2ca1bb 432 renderer,
433 (lambda x: x['ownerText']['runs'][0]['text'],
434 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 435 return {
39ed931e 436 '_type': 'url',
30a074c2 437 'ie_key': YoutubeIE.ie_key(),
438 'id': video_id,
439 'url': video_id,
440 'title': title,
441 'description': description,
442 'duration': duration,
443 'view_count': view_count,
444 'uploader': uploader,
445 }
446
0c148415 447
360e1ca5 448class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 449 IE_DESC = 'YouTube.com'
bc2ca1bb 450 _INVIDIOUS_SITES = (
451 # invidious-redirect websites
452 r'(?:www\.)?redirect\.invidious\.io',
453 r'(?:(?:www|dev)\.)?invidio\.us',
454 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
455 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 456 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 457 r'(?:(?:www|au)\.)?ytprivate\.com',
458 r'(?:www\.)?invidious\.namazso\.eu',
459 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 460 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
461 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
462 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
463 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
464 # youtube-dl invidious instances list
465 r'(?:(?:www|no)\.)?invidiou\.sh',
466 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
467 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 468 r'(?:www\.)?invidious\.mastodon\.host',
469 r'(?:www\.)?invidious\.zapashcanon\.fr',
470 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 471 r'(?:www\.)?invidious\.tinfoil-hat\.net',
472 r'(?:www\.)?invidious\.himiko\.cloud',
473 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 474 r'(?:www\.)?invidious\.tube',
475 r'(?:www\.)?invidiou\.site',
476 r'(?:www\.)?invidious\.site',
477 r'(?:www\.)?invidious\.xyz',
478 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 479 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 480 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 481 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 482 r'(?:www\.)?tube\.poal\.co',
483 r'(?:www\.)?tube\.connect\.cafe',
484 r'(?:www\.)?vid\.wxzm\.sx',
485 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 486 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 487 r'(?:www\.)?yewtu\.be',
488 r'(?:www\.)?yt\.elukerio\.org',
489 r'(?:www\.)?yt\.lelux\.fi',
490 r'(?:www\.)?invidious\.ggc-project\.de',
491 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 492 r'(?:www\.)?ytprivate\.com',
493 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 494 r'(?:www\.)?invidious\.toot\.koeln',
495 r'(?:www\.)?invidious\.fdn\.fr',
496 r'(?:www\.)?watch\.nettohikari\.com',
497 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
498 r'(?:www\.)?qklhadlycap4cnod\.onion',
499 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
500 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
501 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
502 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
503 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
504 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
505 )
cb7dfeea 506 _VALID_URL = r"""(?x)^
c5e8d7af 507 (
edb53e2d 508 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 509 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
510 (?:www\.)?deturl\.com/www\.youtube\.com|
511 (?:www\.)?pwnyoutube\.com|
512 (?:www\.)?hooktube\.com|
513 (?:www\.)?yourepeat\.com|
514 tube\.majestyc\.net|
515 %(invidious)s|
516 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
517 (?:.*?\#/)? # handle anchor (#/) redirect urls
518 (?: # the various things that can precede the ID:
ac7553d0 519 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 520 |(?: # or the v= param in all its forms
f7000f3a 521 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 522 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 523 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
524 v=
525 )
f4b05232 526 ))
cbaed4bb
S
527 |(?:
528 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
529 vid\.plus| # or vid.plus/xxxx
530 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 531 %(invidious)s
cbaed4bb 532 )/
edb53e2d 533 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 534 )
c5e8d7af 535 )? # all until now is optional -> you can pass the naked ID
201c1459 536 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 537 (?(1).+)? # if we found the ID, everything can follow
9297939e 538 (?:\#|$)""" % {
bc2ca1bb 539 'invidious': '|'.join(_INVIDIOUS_SITES),
540 }
e40c758c 541 _PLAYER_INFO_RE = (
cc2db878 542 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
543 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 544 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 545 )
2c62dc26 546 _formats = {
c2d3cb4c 547 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
548 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
549 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
550 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
551 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
552 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
553 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
554 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 555 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 556 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
557 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
558 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
559 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
560 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
561 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 562 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 563 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
564 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 565
566
567 # 3D videos
c2d3cb4c 568 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
569 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
570 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
571 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 572 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
573 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
574 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 575
96fb5605 576 # Apple HTTP Live Streaming
11f12195 577 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 578 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
579 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
580 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
581 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
582 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 583 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
584 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
585
586 # DASH mp4 video
d23028a8
S
587 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
588 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
589 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
590 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
591 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 592 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
593 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
594 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
595 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
596 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
597 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
598 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 599
f6f1fc92 600 # Dash mp4 audio
d23028a8
S
601 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
602 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
603 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
604 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
605 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
606 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
607 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
608
609 # Dash webm
d23028a8
S
610 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
611 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
612 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
613 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
614 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
615 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
616 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
617 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
618 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
619 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
620 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
621 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
622 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
623 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
624 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 625 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
626 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
627 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
628 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
629 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
630 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
631 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
632
633 # Dash webm audio
d23028a8
S
634 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
635 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 636
0857baad 637 # Dash webm audio with opus inside
d23028a8
S
638 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
639 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
640 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 641
ce6b9a2d
PH
642 # RTMP (unnamed)
643 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
644
645 # av01 video only formats sometimes served with "unknown" codecs
646 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
647 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
648 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
649 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 650 }
29f7c58a 651 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 652
fd5c4aab
S
653 _GEO_BYPASS = False
654
78caa52a 655 IE_NAME = 'youtube'
2eb88d95
PH
656 _TESTS = [
657 {
2d3d2997 658 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
659 'info_dict': {
660 'id': 'BaW_jenozKc',
661 'ext': 'mp4',
3867038a 662 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
663 'uploader': 'Philipp Hagemeister',
664 'uploader_id': 'phihag',
ec85ded8 665 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
666 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
667 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 668 'upload_date': '20121002',
3867038a 669 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 670 'categories': ['Science & Technology'],
3867038a 671 'tags': ['youtube-dl'],
556dbe7f 672 'duration': 10,
dbdaaa23 673 'view_count': int,
3e7c1224
PH
674 'like_count': int,
675 'dislike_count': int,
7c80519c 676 'start_time': 1,
297a564b 677 'end_time': 9,
2eb88d95 678 }
0e853ca4 679 },
fccd3771 680 {
4bc3a23e
PH
681 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
682 'note': 'Embed-only video (#1746)',
683 'info_dict': {
684 'id': 'yZIXLfi8CZQ',
685 'ext': 'mp4',
686 'upload_date': '20120608',
687 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
688 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
689 'uploader': 'SET India',
94bfcd23 690 'uploader_id': 'setindia',
ec85ded8 691 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 692 'age_limit': 18,
545cc85d 693 },
694 'skip': 'Private video',
fccd3771 695 },
11b56058 696 {
8bdd16b4 697 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
698 'note': 'Use the first video ID in the URL',
699 'info_dict': {
700 'id': 'BaW_jenozKc',
701 'ext': 'mp4',
3867038a 702 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
703 'uploader': 'Philipp Hagemeister',
704 'uploader_id': 'phihag',
ec85ded8 705 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 706 'upload_date': '20121002',
3867038a 707 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 708 'categories': ['Science & Technology'],
3867038a 709 'tags': ['youtube-dl'],
556dbe7f 710 'duration': 10,
dbdaaa23 711 'view_count': int,
11b56058
PM
712 'like_count': int,
713 'dislike_count': int,
34a7de29
S
714 },
715 'params': {
716 'skip_download': True,
717 },
11b56058 718 },
dd27fd17 719 {
2d3d2997 720 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
721 'note': '256k DASH audio (format 141) via DASH manifest',
722 'info_dict': {
723 'id': 'a9LDPn-MO4I',
724 'ext': 'm4a',
725 'upload_date': '20121002',
726 'uploader_id': '8KVIDEO',
ec85ded8 727 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
728 'description': '',
729 'uploader': '8KVIDEO',
730 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 731 },
4bc3a23e
PH
732 'params': {
733 'youtube_include_dash_manifest': True,
734 'format': '141',
4919603f 735 },
de3c7fe0 736 'skip': 'format 141 not served anymore',
dd27fd17 737 },
8bdd16b4 738 # DASH manifest with encrypted signature
739 {
740 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
741 'info_dict': {
742 'id': 'IB3lcPjvWLA',
743 'ext': 'm4a',
744 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
745 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
746 'duration': 244,
747 'uploader': 'AfrojackVEVO',
748 'uploader_id': 'AfrojackVEVO',
749 'upload_date': '20131011',
cc2db878 750 'abr': 129.495,
8bdd16b4 751 },
752 'params': {
753 'youtube_include_dash_manifest': True,
754 'format': '141/bestaudio[ext=m4a]',
755 },
756 },
aa79ac0c
PH
757 # Controversy video
758 {
759 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
760 'info_dict': {
761 'id': 'T4XJQO3qol8',
762 'ext': 'mp4',
556dbe7f 763 'duration': 219,
aa79ac0c 764 'upload_date': '20100909',
4fe54c12 765 'uploader': 'Amazing Atheist',
aa79ac0c 766 'uploader_id': 'TheAmazingAtheist',
ec85ded8 767 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 768 'title': 'Burning Everyone\'s Koran',
545cc85d 769 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 770 }
c522adb1 771 },
dd2d55f1 772 # Normal age-gate video (embed allowed)
c522adb1 773 {
2d3d2997 774 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
775 'info_dict': {
776 'id': 'HtVdAasjOgU',
777 'ext': 'mp4',
778 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 779 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 780 'duration': 142,
c522adb1
JMF
781 'uploader': 'The Witcher',
782 'uploader_id': 'WitcherGame',
ec85ded8 783 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 784 'upload_date': '20140605',
34952f09 785 'age_limit': 18,
c522adb1
JMF
786 },
787 },
8bdd16b4 788 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
789 # YouTube Red ad is not captured for creator
790 {
791 'url': '__2ABJjxzNo',
792 'info_dict': {
793 'id': '__2ABJjxzNo',
794 'ext': 'mp4',
795 'duration': 266,
796 'upload_date': '20100430',
797 'uploader_id': 'deadmau5',
798 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 799 'creator': 'deadmau5',
800 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 801 'uploader': 'deadmau5',
802 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 803 'alt_title': 'Some Chords',
8bdd16b4 804 },
805 'expected_warnings': [
806 'DASH manifest missing',
807 ]
808 },
067aa17e 809 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
810 {
811 'url': 'lqQg6PlCWgI',
812 'info_dict': {
813 'id': 'lqQg6PlCWgI',
814 'ext': 'mp4',
556dbe7f 815 'duration': 6085,
90227264 816 'upload_date': '20150827',
cbe2bd91 817 'uploader_id': 'olympic',
ec85ded8 818 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 819 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 820 'uploader': 'Olympic',
cbe2bd91
PH
821 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
822 },
823 'params': {
824 'skip_download': 'requires avconv',
e52a40ab 825 }
cbe2bd91 826 },
6271f1ca
PH
827 # Non-square pixels
828 {
829 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
830 'info_dict': {
831 'id': '_b-2C3KPAM0',
832 'ext': 'mp4',
833 'stretched_ratio': 16 / 9.,
556dbe7f 834 'duration': 85,
6271f1ca
PH
835 'upload_date': '20110310',
836 'uploader_id': 'AllenMeow',
ec85ded8 837 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 838 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 839 'uploader': '孫ᄋᄅ',
6271f1ca
PH
840 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
841 },
06b491eb
S
842 },
843 # url_encoded_fmt_stream_map is empty string
844 {
845 'url': 'qEJwOuvDf7I',
846 'info_dict': {
847 'id': 'qEJwOuvDf7I',
f57b7835 848 'ext': 'webm',
06b491eb
S
849 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
850 'description': '',
851 'upload_date': '20150404',
852 'uploader_id': 'spbelect',
853 'uploader': 'Наблюдатели Петербурга',
854 },
855 'params': {
856 'skip_download': 'requires avconv',
e323cf3f
S
857 },
858 'skip': 'This live event has ended.',
06b491eb 859 },
067aa17e 860 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
861 {
862 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
863 'info_dict': {
864 'id': 'FIl7x6_3R5Y',
eb6793ba 865 'ext': 'webm',
da77d856
S
866 'title': 'md5:7b81415841e02ecd4313668cde88737a',
867 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 868 'duration': 220,
da77d856
S
869 'upload_date': '20150625',
870 'uploader_id': 'dorappi2000',
ec85ded8 871 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 872 'uploader': 'dorappi2000',
eb6793ba 873 'formats': 'mincount:31',
da77d856 874 },
eb6793ba 875 'skip': 'not actual anymore',
2ee8f5d8 876 },
8a1a26ce
YCH
877 # DASH manifest with segment_list
878 {
879 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
880 'md5': '8ce563a1d667b599d21064e982ab9e31',
881 'info_dict': {
882 'id': 'CsmdDsKjzN8',
883 'ext': 'mp4',
17ee98e1 884 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
885 'uploader': 'Airtek',
886 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
887 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
888 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
889 },
890 'params': {
891 'youtube_include_dash_manifest': True,
892 'format': '135', # bestvideo
be49068d
S
893 },
894 'skip': 'This live event has ended.',
2ee8f5d8 895 },
cf7e015f
S
896 {
897 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 898 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 899 'info_dict': {
545cc85d 900 'id': 'jvGDaLqkpTg',
901 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
902 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
903 },
904 'playlist': [{
905 'info_dict': {
545cc85d 906 'id': 'jvGDaLqkpTg',
cf7e015f 907 'ext': 'mp4',
545cc85d 908 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
909 'description': 'md5:e03b909557865076822aa169218d6a5d',
910 'duration': 10643,
911 'upload_date': '20161111',
912 'uploader': 'Team PGP',
913 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
914 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
915 },
916 }, {
917 'info_dict': {
545cc85d 918 'id': '3AKt1R1aDnw',
cf7e015f 919 'ext': 'mp4',
545cc85d 920 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
921 'description': 'md5:e03b909557865076822aa169218d6a5d',
922 'duration': 10991,
923 'upload_date': '20161111',
924 'uploader': 'Team PGP',
925 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
926 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
927 },
928 }, {
929 'info_dict': {
545cc85d 930 'id': 'RtAMM00gpVc',
cf7e015f 931 'ext': 'mp4',
545cc85d 932 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
933 'description': 'md5:e03b909557865076822aa169218d6a5d',
934 'duration': 10995,
935 'upload_date': '20161111',
936 'uploader': 'Team PGP',
937 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
938 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
939 },
940 }, {
941 'info_dict': {
545cc85d 942 'id': '6N2fdlP3C5U',
cf7e015f 943 'ext': 'mp4',
545cc85d 944 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
945 'description': 'md5:e03b909557865076822aa169218d6a5d',
946 'duration': 10990,
947 'upload_date': '20161111',
948 'uploader': 'Team PGP',
949 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
950 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
951 },
952 }],
953 'params': {
954 'skip_download': True,
955 },
cbaed4bb 956 },
f9f49d87 957 {
067aa17e 958 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
959 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
960 'info_dict': {
961 'id': 'gVfLd0zydlo',
962 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
963 },
964 'playlist_count': 2,
be49068d 965 'skip': 'Not multifeed anymore',
f9f49d87 966 },
cbaed4bb 967 {
2d3d2997 968 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 969 'only_matching': True,
0e49d9a6 970 },
6d4fc66b 971 {
2d3d2997 972 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
973 'only_matching': True,
974 },
0e49d9a6 975 {
067aa17e 976 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 977 # Also tests cut-off URL expansion in video description (see
067aa17e
S
978 # https://github.com/ytdl-org/youtube-dl/issues/1892,
979 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
980 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
981 'info_dict': {
982 'id': 'lsguqyKfVQg',
983 'ext': 'mp4',
984 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 985 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 986 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 987 'duration': 133,
0e49d9a6
LL
988 'upload_date': '20151119',
989 'uploader_id': 'IronSoulElf',
ec85ded8 990 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 991 'uploader': 'IronSoulElf',
eb6793ba
S
992 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
993 'track': 'Dark Walk - Position Music',
994 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 995 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
996 },
997 'params': {
998 'skip_download': True,
999 },
1000 },
61f92af1 1001 {
067aa17e 1002 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1003 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1004 'only_matching': True,
1005 },
313dfc45
LL
1006 {
1007 # Video with yt:stretch=17:0
1008 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1009 'info_dict': {
1010 'id': 'Q39EVAstoRM',
1011 'ext': 'mp4',
1012 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1013 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1014 'upload_date': '20151107',
1015 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1016 'uploader': 'CH GAMER DROID',
1017 },
1018 'params': {
1019 'skip_download': True,
1020 },
be49068d 1021 'skip': 'This video does not exist.',
313dfc45 1022 },
201c1459 1023 {
1024 # Video with incomplete 'yt:stretch=16:'
1025 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1026 'only_matching': True,
1027 },
7caf9830
S
1028 {
1029 # Video licensed under Creative Commons
1030 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1031 'info_dict': {
1032 'id': 'M4gD1WSo5mA',
1033 'ext': 'mp4',
1034 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1035 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1036 'duration': 721,
7caf9830
S
1037 'upload_date': '20150127',
1038 'uploader_id': 'BerkmanCenter',
ec85ded8 1039 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1040 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1041 'license': 'Creative Commons Attribution license (reuse allowed)',
1042 },
1043 'params': {
1044 'skip_download': True,
1045 },
1046 },
fd050249
S
1047 {
1048 # Channel-like uploader_url
1049 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1050 'info_dict': {
1051 'id': 'eQcmzGIKrzg',
1052 'ext': 'mp4',
1053 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1054 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1055 'duration': 4060,
fd050249 1056 'upload_date': '20151119',
eb6793ba 1057 'uploader': 'Bernie Sanders',
fd050249 1058 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1059 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1060 'license': 'Creative Commons Attribution license (reuse allowed)',
1061 },
1062 'params': {
1063 'skip_download': True,
1064 },
1065 },
040ac686
S
1066 {
1067 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1068 'only_matching': True,
7f29cf54
S
1069 },
1070 {
067aa17e 1071 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1072 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1073 'only_matching': True,
6496ccb4
S
1074 },
1075 {
1076 # Rental video preview
1077 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1078 'info_dict': {
1079 'id': 'uGpuVWrhIzE',
1080 'ext': 'mp4',
1081 'title': 'Piku - Trailer',
1082 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1083 'upload_date': '20150811',
1084 'uploader': 'FlixMatrix',
1085 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1086 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1087 'license': 'Standard YouTube License',
1088 },
1089 'params': {
1090 'skip_download': True,
1091 },
eb6793ba 1092 'skip': 'This video is not available.',
022a5d66 1093 },
12afdc2a
S
1094 {
1095 # YouTube Red video with episode data
1096 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1097 'info_dict': {
1098 'id': 'iqKdEhx-dD4',
1099 'ext': 'mp4',
1100 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1101 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1102 'duration': 2085,
12afdc2a
S
1103 'upload_date': '20170118',
1104 'uploader': 'Vsauce',
1105 'uploader_id': 'Vsauce',
1106 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1107 'series': 'Mind Field',
1108 'season_number': 1,
1109 'episode_number': 1,
1110 },
1111 'params': {
1112 'skip_download': True,
1113 },
1114 'expected_warnings': [
1115 'Skipping DASH manifest',
1116 ],
1117 },
c7121fa7
S
1118 {
1119 # The following content has been identified by the YouTube community
1120 # as inappropriate or offensive to some audiences.
1121 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1122 'info_dict': {
1123 'id': '6SJNVb0GnPI',
1124 'ext': 'mp4',
1125 'title': 'Race Differences in Intelligence',
1126 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1127 'duration': 965,
1128 'upload_date': '20140124',
1129 'uploader': 'New Century Foundation',
1130 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1131 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1132 },
1133 'params': {
1134 'skip_download': True,
1135 },
545cc85d 1136 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1137 },
022a5d66
S
1138 {
1139 # itag 212
1140 'url': '1t24XAntNCY',
1141 'only_matching': True,
fd5c4aab
S
1142 },
1143 {
1144 # geo restricted to JP
1145 'url': 'sJL6WA-aGkQ',
1146 'only_matching': True,
1147 },
cd5a74a2
S
1148 {
1149 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1150 'only_matching': True,
1151 },
bc2ca1bb 1152 {
1153 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1154 'only_matching': True,
1155 },
1156 {
1157 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1158 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1159 'only_matching': True,
1160 },
825cd268
RA
1161 {
1162 # DRM protected
1163 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1164 'only_matching': True,
4fe54c12
S
1165 },
1166 {
1167 # Video with unsupported adaptive stream type formats
1168 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1169 'info_dict': {
1170 'id': 'Z4Vy8R84T1U',
1171 'ext': 'mp4',
1172 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1173 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1174 'duration': 433,
1175 'upload_date': '20130923',
1176 'uploader': 'Amelia Putri Harwita',
1177 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1178 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1179 'formats': 'maxcount:10',
1180 },
1181 'params': {
1182 'skip_download': True,
1183 'youtube_include_dash_manifest': False,
1184 },
5429d6a9 1185 'skip': 'not actual anymore',
5caabd3c 1186 },
1187 {
822b9d9c 1188 # Youtube Music Auto-generated description
5caabd3c 1189 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1190 'info_dict': {
1191 'id': 'MgNrAu2pzNs',
1192 'ext': 'mp4',
1193 'title': 'Voyeur Girl',
1194 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1195 'upload_date': '20190312',
5429d6a9
S
1196 'uploader': 'Stephen - Topic',
1197 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1198 'artist': 'Stephen',
1199 'track': 'Voyeur Girl',
1200 'album': 'it\'s too much love to know my dear',
1201 'release_date': '20190313',
1202 'release_year': 2019,
1203 },
1204 'params': {
1205 'skip_download': True,
1206 },
1207 },
66b48727
RA
1208 {
1209 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1210 'only_matching': True,
1211 },
011e75e6
S
1212 {
1213 # invalid -> valid video id redirection
1214 'url': 'DJztXj2GPfl',
1215 'info_dict': {
1216 'id': 'DJztXj2GPfk',
1217 'ext': 'mp4',
1218 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1219 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1220 'upload_date': '20090125',
1221 'uploader': 'Prochorowka',
1222 'uploader_id': 'Prochorowka',
1223 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1224 'artist': 'Panjabi MC',
1225 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1226 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1227 },
1228 'params': {
1229 'skip_download': True,
1230 },
545cc85d 1231 'skip': 'Video unavailable',
ea74e00b
DP
1232 },
1233 {
1234 # empty description results in an empty string
1235 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1236 'info_dict': {
1237 'id': 'x41yOUIvK2k',
1238 'ext': 'mp4',
1239 'title': 'IMG 3456',
1240 'description': '',
1241 'upload_date': '20170613',
1242 'uploader_id': 'ElevageOrVert',
1243 'uploader': 'ElevageOrVert',
1244 },
1245 'params': {
1246 'skip_download': True,
1247 },
1248 },
a0566bbf 1249 {
29f7c58a 1250 # with '};' inside yt initial data (see [1])
1251 # see [2] for an example with '};' inside ytInitialPlayerResponse
1252 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1253 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1254 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1255 'info_dict': {
1256 'id': 'CHqg6qOn4no',
1257 'ext': 'mp4',
1258 'title': 'Part 77 Sort a list of simple types in c#',
1259 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1260 'upload_date': '20130831',
1261 'uploader_id': 'kudvenkat',
1262 'uploader': 'kudvenkat',
1263 },
1264 'params': {
1265 'skip_download': True,
1266 },
1267 },
29f7c58a 1268 {
1269 # another example of '};' in ytInitialData
1270 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1271 'only_matching': True,
1272 },
1273 {
1274 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1275 'only_matching': True,
1276 },
545cc85d 1277 {
cc2db878 1278 # https://github.com/ytdl-org/youtube-dl/pull/28094
1279 'url': 'OtqTfy26tG0',
1280 'info_dict': {
1281 'id': 'OtqTfy26tG0',
1282 'ext': 'mp4',
1283 'title': 'Burn Out',
1284 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1285 'upload_date': '20141120',
1286 'uploader': 'The Cinematic Orchestra - Topic',
1287 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1288 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1289 'artist': 'The Cinematic Orchestra',
1290 'track': 'Burn Out',
1291 'album': 'Every Day',
1292 'release_data': None,
1293 'release_year': None,
1294 },
1295 'params': {
1296 'skip_download': True,
1297 },
545cc85d 1298 },
bc2ca1bb 1299 {
1300 # controversial video, only works with bpctr when authenticated with cookies
1301 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1302 'only_matching': True,
1303 },
f7ad7160 1304 {
1305 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1306 'url': 'cBvYw8_A0vQ',
1307 'info_dict': {
1308 'id': 'cBvYw8_A0vQ',
1309 'ext': 'mp4',
1310 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1311 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1312 'upload_date': '20201120',
1313 'uploader': 'Walk around Japan',
1314 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1315 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1316 },
1317 'params': {
1318 'skip_download': True,
1319 },
0fb983f6 1320 }, {
1321 # Has multiple audio streams
1322 'url': 'WaOKSUlf4TM',
1323 'only_matching': True
9297939e 1324 }, {
1325 # Requires Premium: has format 141 when requested using YTM url
1326 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1327 'only_matching': True
1328 }, {
120916da 1329 # multiple subtitles with same lang_code
1330 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1331 'only_matching': True,
1332 },
2eb88d95
PH
1333 ]
1334
201c1459 1335 @classmethod
1336 def suitable(cls, url):
1bdae7d3 1337 # Hack for lazy extractors until more generic solution is implemented
1338 # (see #28780)
1339 from .youtube import parse_qs
201c1459 1340 qs = parse_qs(url)
1341 if qs.get('list', [None])[0]:
1342 return False
1343 return super(YoutubeIE, cls).suitable(url)
1344
e0df6211
PH
1345 def __init__(self, *args, **kwargs):
1346 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1347 self._code_cache = {}
83799698 1348 self._player_cache = {}
e0df6211 1349
60064c53
PH
1350 def _signature_cache_id(self, example_sig):
1351 """ Return a string representation of a signature """
78caa52a 1352 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1353
e40c758c
S
1354 @classmethod
1355 def _extract_player_info(cls, player_url):
1356 for player_re in cls._PLAYER_INFO_RE:
1357 id_m = re.search(player_re, player_url)
1358 if id_m:
1359 break
1360 else:
c081b35c 1361 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1362 return id_m.group('id')
e40c758c
S
1363
1364 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1365 player_id = self._extract_player_info(player_url)
e0df6211 1366
c4417ddb 1367 # Read from filesystem cache
545cc85d 1368 func_id = 'js_%s_%s' % (
1369 player_id, self._signature_cache_id(example_sig))
c4417ddb 1370 assert os.path.basename(func_id) == func_id
a0e07d31 1371
69ea8ca4 1372 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1373 if cache_spec is not None:
78caa52a 1374 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1375
545cc85d 1376 if player_id not in self._code_cache:
1377 self._code_cache[player_id] = self._download_webpage(
e0df6211 1378 player_url, video_id,
545cc85d 1379 note='Downloading player ' + player_id,
69ea8ca4 1380 errnote='Download of %s failed' % player_url)
545cc85d 1381 code = self._code_cache[player_id]
1382 res = self._parse_sig_js(code)
e0df6211 1383
785521bf
PH
1384 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1385 cache_res = res(test_string)
1386 cache_spec = [ord(c) for c in cache_res]
83799698 1387
69ea8ca4 1388 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1389 return res
1390
60064c53 1391 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1392 def gen_sig_code(idxs):
1393 def _genslice(start, end, step):
78caa52a 1394 starts = '' if start == 0 else str(start)
8bcc8756 1395 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1396 steps = '' if step == 1 else (':%d' % step)
78caa52a 1397 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1398
1399 step = None
7af808a5
PH
1400 # Quelch pyflakes warnings - start will be set when step is set
1401 start = '(Never used)'
edf3e38e
PH
1402 for i, prev in zip(idxs[1:], idxs[:-1]):
1403 if step is not None:
1404 if i - prev == step:
1405 continue
1406 yield _genslice(start, prev, step)
1407 step = None
1408 continue
1409 if i - prev in [-1, 1]:
1410 step = i - prev
1411 start = prev
1412 continue
1413 else:
78caa52a 1414 yield 's[%d]' % prev
edf3e38e 1415 if step is None:
78caa52a 1416 yield 's[%d]' % i
edf3e38e
PH
1417 else:
1418 yield _genslice(start, i, step)
1419
78caa52a 1420 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1421 cache_res = func(test_string)
edf3e38e 1422 cache_spec = [ord(c) for c in cache_res]
78caa52a 1423 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1424 signature_id_tuple = '(%s)' % (
1425 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1426 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1427 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1428 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1429
e0df6211
PH
1430 def _parse_sig_js(self, jscode):
1431 funcname = self._search_regex(
abefc03f
S
1432 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1433 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1434 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1435 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1436 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1437 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1438 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1439 # Obsolete patterns
1440 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1441 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1442 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1443 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1444 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1445 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1446 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1447 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1448 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1449
1450 jsi = JSInterpreter(jscode)
1451 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1452 return lambda s: initial_function([s])
1453
545cc85d 1454 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1455 """Turn the encrypted s field into a working signature"""
6b37f0be 1456
c8bf86d5 1457 if player_url is None:
69ea8ca4 1458 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1459
69ea8ca4 1460 if player_url.startswith('//'):
78caa52a 1461 player_url = 'https:' + player_url
3c90cc8b
S
1462 elif not re.match(r'https?://', player_url):
1463 player_url = compat_urlparse.urljoin(
1464 'https://www.youtube.com', player_url)
c8bf86d5 1465 try:
62af3a0e 1466 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1467 if player_id not in self._player_cache:
1468 func = self._extract_signature_function(
60064c53 1469 video_id, player_url, s
c8bf86d5
PH
1470 )
1471 self._player_cache[player_id] = func
1472 func = self._player_cache[player_id]
a06916d9 1473 if self.get_param('youtube_print_sig_code'):
60064c53 1474 self._print_sig_code(func, s)
c8bf86d5
PH
1475 return func(s)
1476 except Exception as e:
1477 tb = traceback.format_exc()
1478 raise ExtractorError(
78caa52a 1479 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1480
545cc85d 1481 def _mark_watched(self, video_id, player_response):
21c340b8
S
1482 playback_url = url_or_none(try_get(
1483 player_response,
545cc85d 1484 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1485 if not playback_url:
1486 return
1487 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1488 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1489
1490 # cpn generation algorithm is reverse engineered from base.js.
1491 # In fact it works even with dummy cpn.
1492 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1493 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1494
1495 qs.update({
1496 'ver': ['2'],
1497 'cpn': [cpn],
1498 })
1499 playback_url = compat_urlparse.urlunparse(
15707c7e 1500 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1501
1502 self._download_webpage(
1503 playback_url, video_id, 'Marking watched',
1504 'Unable to mark watched', fatal=False)
1505
66c9fa36
S
1506 @staticmethod
1507 def _extract_urls(webpage):
1508 # Embedded YouTube player
1509 entries = [
1510 unescapeHTML(mobj.group('url'))
1511 for mobj in re.finditer(r'''(?x)
1512 (?:
1513 <iframe[^>]+?src=|
1514 data-video-url=|
1515 <embed[^>]+?src=|
1516 embedSWF\(?:\s*|
1517 <object[^>]+data=|
1518 new\s+SWFObject\(
1519 )
1520 (["\'])
1521 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1522 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1523 \1''', webpage)]
1524
1525 # lazyYT YouTube embed
1526 entries.extend(list(map(
1527 unescapeHTML,
1528 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1529
1530 # Wordpress "YouTube Video Importer" plugin
1531 matches = re.findall(r'''(?x)<div[^>]+
1532 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1533 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1534 entries.extend(m[-1] for m in matches)
1535
1536 return entries
1537
1538 @staticmethod
1539 def _extract_url(webpage):
1540 urls = YoutubeIE._extract_urls(webpage)
1541 return urls[0] if urls else None
1542
97665381
PH
1543 @classmethod
1544 def extract_id(cls, url):
1545 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1546 if mobj is None:
69ea8ca4 1547 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1548 video_id = mobj.group(2)
1549 return video_id
1550
545cc85d 1551 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1552 chapters_list = try_get(
8bdd16b4 1553 data,
84213ea8
S
1554 lambda x: x['playerOverlays']
1555 ['playerOverlayRenderer']
1556 ['decoratedPlayerBarRenderer']
1557 ['decoratedPlayerBarRenderer']
1558 ['playerBar']
1559 ['chapteredPlayerBarRenderer']
1560 ['chapters'],
1561 list)
1562 if not chapters_list:
1563 return
1564
1565 def chapter_time(chapter):
1566 return float_or_none(
1567 try_get(
1568 chapter,
1569 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1570 int),
1571 scale=1000)
1572 chapters = []
1573 for next_num, chapter in enumerate(chapters_list, start=1):
1574 start_time = chapter_time(chapter)
1575 if start_time is None:
1576 continue
1577 end_time = (chapter_time(chapters_list[next_num])
1578 if next_num < len(chapters_list) else duration)
1579 if end_time is None:
1580 continue
1581 title = try_get(
1582 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1583 compat_str)
1584 chapters.append({
1585 'start_time': start_time,
1586 'end_time': end_time,
1587 'title': title,
1588 })
1589 return chapters
1590
545cc85d 1591 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1592 return self._parse_json(self._search_regex(
1593 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1594 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1595
d92f5d5a 1596 @staticmethod
1597 def parse_time_text(time_text):
1598 """
1599 Parse the comment time text
1600 time_text is in the format 'X units ago (edited)'
1601 """
1602 time_text_split = time_text.split(' ')
1603 if len(time_text_split) >= 3:
1604 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1605
a1c5d2ca
M
1606 @staticmethod
1607 def _join_text_entries(runs):
1608 text = None
1609 for run in runs:
1610 if not isinstance(run, dict):
1611 continue
1612 sub_text = try_get(run, lambda x: x['text'], compat_str)
1613 if sub_text:
1614 if not text:
1615 text = sub_text
1616 continue
1617 text += sub_text
1618 return text
1619
1620 def _extract_comment(self, comment_renderer, parent=None):
1621 comment_id = comment_renderer.get('commentId')
1622 if not comment_id:
1623 return
1624 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1625 text = self._join_text_entries(comment_text_runs) or ''
1626 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1627 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1628 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1629 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1630 author_id = try_get(comment_renderer,
1631 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1632 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1633 lambda x: x['likeCount']), compat_str)) or 0
1634 author_thumbnail = try_get(comment_renderer,
1635 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1636
1637 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1638 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1639 return {
1640 'id': comment_id,
1641 'text': text,
d92f5d5a 1642 'timestamp': timestamp,
a1c5d2ca
M
1643 'time_text': time_text,
1644 'like_count': votes,
1645 'is_favorited': is_liked,
1646 'author': author,
1647 'author_id': author_id,
1648 'author_thumbnail': author_thumbnail,
1649 'author_is_uploader': author_is_uploader,
1650 'parent': parent or 'root'
1651 }
1652
1653 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1654 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1655
1656 def extract_thread(parent_renderer):
1657 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1658 if not parent:
1659 comment_counts[2] = 0
1660 for content in contents:
1661 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1662 comment_renderer = try_get(
1663 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1664 content, (lambda x: x['commentRenderer'], dict))
1665
1666 if not comment_renderer:
1667 continue
1668 comment = self._extract_comment(comment_renderer, parent)
1669 if not comment:
1670 continue
1671 comment_counts[0] += 1
1672 yield comment
1673 # Attempt to get the replies
1674 comment_replies_renderer = try_get(
1675 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1676
1677 if comment_replies_renderer:
1678 comment_counts[2] += 1
1679 comment_entries_iter = self._comment_entries(
f4f751af 1680 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1681 parent=comment.get('id'), session_token_list=session_token_list,
1682 comment_counts=comment_counts)
1683
1684 for reply_comment in comment_entries_iter:
1685 yield reply_comment
1686
1687 if not comment_counts:
1688 # comment so far, est. total comments, current comment thread #
1689 comment_counts = [0, 0, 0]
a1c5d2ca
M
1690
1691 # TODO: Generalize the download code with TabIE
f4f751af 1692 context = self._extract_context(ytcfg)
1693 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1694 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1695 first_continuation = False
1696 if parent is None:
1697 first_continuation = True
1698
1699 for page_num in itertools.count(0):
1700 if not continuation:
1701 break
f4f751af 1702 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a06916d9 1703 retries = self.get_param('extractor_retries', 3)
a1c5d2ca
M
1704 count = -1
1705 last_error = None
1706
1707 while count < retries:
1708 count += 1
1709 if last_error:
1710 self.report_warning('%s. Retrying ...' % last_error)
1711 try:
1712 query = {
1713 'ctoken': continuation['ctoken'],
1714 'pbj': 1,
1715 'type': 'next',
1716 }
1717 if parent:
1718 query['action_get_comment_replies'] = 1
1719 else:
1720 query['action_get_comments'] = 1
1721
1722 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1723 if page_num == 0:
1724 if first_continuation:
d92f5d5a 1725 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1726 else:
d92f5d5a 1727 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1728 else:
d92f5d5a 1729 note_prefix = '%sDownloading comment%s page %d %s' % (
1730 ' ' if parent else '',
a1c5d2ca
M
1731 ' replies' if parent else '',
1732 page_num,
1733 comment_prog_str)
1734
1735 browse = self._download_json(
1736 'https://www.youtube.com/comment_service_ajax', None,
1737 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1738 headers=headers, query=query,
1739 data=urlencode_postdata({
1740 'session_token': session_token_list[0]
1741 }))
1742 except ExtractorError as e:
1743 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1744 if e.cause.code == 413:
d92f5d5a 1745 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1746 return
1747 # Downloading page may result in intermittent 5xx HTTP error
1748 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1749 last_error = 'HTTP Error %s' % e.cause.code
1750 if e.cause.code == 404:
d92f5d5a 1751 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1752 if count < retries:
1753 continue
1754 raise
1755 else:
1756 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1757 if session_token:
1758 session_token_list[0] = session_token
1759
1760 response = try_get(browse,
1761 (lambda x: x['response'],
1762 lambda x: x[1]['response'])) or {}
1763
1764 if response.get('continuationContents'):
1765 break
1766
1767 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1768 if browse.get('reload'):
d92f5d5a 1769 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1770
1771 # TODO: not tested, merged from old extractor
1772 err_msg = browse.get('externalErrorMessage')
1773 if err_msg:
1774 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1775
1776 # Youtube sometimes sends incomplete data
1777 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1778 last_error = 'Incomplete data received'
1779 if count >= retries:
6a39ee13 1780 raise ExtractorError(last_error)
a1c5d2ca
M
1781
1782 if not response:
1783 break
f4f751af 1784 visitor_data = try_get(
1785 response,
1786 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1787 compat_str) or visitor_data
a1c5d2ca
M
1788
1789 known_continuation_renderers = {
1790 'itemSectionContinuation': extract_thread,
1791 'commentRepliesContinuation': extract_thread
1792 }
1793
1794 # extract next root continuation from the results
1795 continuation_contents = try_get(
1796 response, lambda x: x['continuationContents'], dict) or {}
1797
1798 for key, value in continuation_contents.items():
1799 if key not in known_continuation_renderers:
1800 continue
1801 continuation_renderer = value
1802
1803 if first_continuation:
1804 first_continuation = False
1805 expected_comment_count = try_get(
1806 continuation_renderer,
1807 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1808 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1809 compat_str)
1810
1811 if expected_comment_count:
1812 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1813 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1814 yield comment_counts[1]
1815
1816 # TODO: cli arg.
1817 # 1/True for newest, 0/False for popular (default)
1818 comment_sort_index = int(True)
1819 sort_continuation_renderer = try_get(
1820 continuation_renderer,
1821 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1822 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1823 # If this fails, the initial continuation page
1824 # starts off with popular anyways.
1825 if sort_continuation_renderer:
1826 continuation = YoutubeTabIE._build_continuation_query(
1827 continuation=sort_continuation_renderer.get('continuation'),
1828 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1829 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1830 break
1831
1832 for entry in known_continuation_renderers[key](continuation_renderer):
1833 yield entry
1834
1835 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1836 break
1837
1838 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1839 """Entry for comment extraction"""
1840 comments = []
1841 known_entry_comment_renderers = (
1842 'itemSectionRenderer',
1843 )
1844 estimated_total = 0
1845 for entry in contents:
1846 for key, renderer in entry.items():
1847 if key not in known_entry_comment_renderers:
1848 continue
1849
1850 comment_iter = self._comment_entries(
1851 renderer,
1852 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1853 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1854 ytcfg=ytcfg,
a1c5d2ca
M
1855 session_token_list=[xsrf_token])
1856
1857 for comment in comment_iter:
1858 if isinstance(comment, int):
1859 estimated_total = comment
1860 continue
1861 comments.append(comment)
1862 break
d92f5d5a 1863 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1864 return {
1865 'comments': comments,
1866 'comment_count': len(comments),
1867 }
1868
c5e8d7af 1869 def _real_extract(self, url):
cf7e015f 1870 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1871 video_id = self._match_id(url)
9297939e 1872
1873 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
1874
545cc85d 1875 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1876 webpage_url = base_url + 'watch?v=' + video_id
1877 webpage = self._download_webpage(
cce889b9 1878 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1879
9297939e 1880 def get_text(x):
1881 if not x:
1882 return
1883 text = x.get('simpleText')
1884 if text and isinstance(text, compat_str):
1885 return text
1886 runs = x.get('runs')
1887 if not isinstance(runs, list):
1888 return
1889 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
1890
1891 ytm_streaming_data = {}
1892 if is_music_url:
1893 # we are forcing to use parse_json because 141 only appeared in get_video_info.
1894 # el, c, cver, cplayer field required for 141(aac 256kbps) codec
1895 # maybe paramter of youtube music player?
1896 ytm_player_response = self._parse_json(try_get(compat_parse_qs(
1897 self._download_webpage(
1898 base_url + 'get_video_info', video_id,
fe03a6cd 1899 'Fetching youtube music info webpage',
1900 'unable to download youtube music info webpage', query={
9297939e 1901 'video_id': video_id,
1902 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1903 'el': 'detailpage',
1904 'c': 'WEB_REMIX',
1905 'cver': '0.1',
00ae2769 1906 'cplayer': 'UNIPLAYER',
1907 'html5': '1',
9297939e 1908 }, fatal=False)),
1909 lambda x: x['player_response'][0],
1910 compat_str) or '{}', video_id)
1911 ytm_streaming_data = ytm_player_response.get('streamingData') or {}
1912
545cc85d 1913 player_response = None
1914 if webpage:
1915 player_response = self._extract_yt_initial_variable(
1916 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1917 video_id, 'initial player response')
f4f751af 1918
1919 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1920 if not player_response:
1921 player_response = self._call_api(
f4f751af 1922 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1923
1924 playability_status = player_response.get('playabilityStatus') or {}
1925 if playability_status.get('reason') == 'Sign in to confirm your age':
1926 pr = self._parse_json(try_get(compat_parse_qs(
1927 self._download_webpage(
1928 base_url + 'get_video_info', video_id,
1929 'Refetching age-gated info webpage',
1930 'unable to download video info webpage', query={
1931 'video_id': video_id,
7c60c33e 1932 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
00ae2769 1933 'html5': '1',
545cc85d 1934 }, fatal=False)),
1935 lambda x: x['player_response'][0],
1936 compat_str) or '{}', video_id)
1937 if pr:
1938 player_response = pr
1939
1940 trailer_video_id = try_get(
1941 playability_status,
1942 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1943 compat_str)
1944 if trailer_video_id:
1945 return self.url_result(
1946 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1947
545cc85d 1948 search_meta = (
1949 lambda x: self._html_search_meta(x, webpage, default=None)) \
1950 if webpage else lambda x: None
dbdaaa23 1951
545cc85d 1952 video_details = player_response.get('videoDetails') or {}
37357d21 1953 microformat = try_get(
545cc85d 1954 player_response,
1955 lambda x: x['microformat']['playerMicroformatRenderer'],
1956 dict) or {}
1957 video_title = video_details.get('title') \
1958 or get_text(microformat.get('title')) \
1959 or search_meta(['og:title', 'twitter:title', 'title'])
1960 video_description = video_details.get('shortDescription')
cf7e015f 1961
8fe10494 1962 if not smuggled_data.get('force_singlefeed', False):
a06916d9 1963 if not self.get_param('noplaylist'):
8fe10494
S
1964 multifeed_metadata_list = try_get(
1965 player_response,
1966 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1967 compat_str)
8fe10494
S
1968 if multifeed_metadata_list:
1969 entries = []
1970 feed_ids = []
1971 for feed in multifeed_metadata_list.split(','):
1972 # Unquote should take place before split on comma (,) since textual
1973 # fields may contain comma as well (see
067aa17e 1974 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1975 feed_data = compat_parse_qs(
1976 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1977
1978 def feed_entry(name):
545cc85d 1979 return try_get(
1980 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1981
1982 feed_id = feed_entry('id')
1983 if not feed_id:
1984 continue
1985 feed_title = feed_entry('title')
1986 title = video_title
1987 if feed_title:
1988 title += ' (%s)' % feed_title
8fe10494
S
1989 entries.append({
1990 '_type': 'url_transparent',
1991 'ie_key': 'Youtube',
1992 'url': smuggle_url(
545cc85d 1993 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1994 {'force_singlefeed': True}),
6b09401b 1995 'title': title,
8fe10494 1996 })
6b09401b 1997 feed_ids.append(feed_id)
8fe10494
S
1998 self.to_screen(
1999 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2000 % (', '.join(feed_ids), video_id))
545cc85d 2001 return self.playlist_result(
2002 entries, video_id, video_title, video_description)
8fe10494
S
2003 else:
2004 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2005
9297939e 2006 formats, itags, stream_ids = [], [], []
cc2db878 2007 itag_qualities = {}
545cc85d 2008 player_url = None
d3fc8074 2009 q = qualities([
2010 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2011 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2012 ])
9297939e 2013
545cc85d 2014 streaming_data = player_response.get('streamingData') or {}
2015 streaming_formats = streaming_data.get('formats') or []
2016 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2017 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2018 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2019
545cc85d 2020 for fmt in streaming_formats:
2021 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2022 continue
321bf820 2023
cc2db878 2024 itag = str_or_none(fmt.get('itag'))
9297939e 2025 audio_track = fmt.get('audioTrack') or {}
2026 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2027 if stream_id in stream_ids:
2028 continue
2029
cc2db878 2030 quality = fmt.get('quality')
d3fc8074 2031 if quality == 'tiny' or not quality:
2032 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2033 if itag and quality:
2034 itag_qualities[itag] = quality
2035 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2036 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2037 # number of fragment that would subsequently requested with (`&sq=N`)
2038 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2039 continue
2040
545cc85d 2041 fmt_url = fmt.get('url')
2042 if not fmt_url:
2043 sc = compat_parse_qs(fmt.get('signatureCipher'))
2044 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2045 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2046 if not (sc and fmt_url and encrypted_sig):
2047 continue
2048 if not player_url:
2049 if not webpage:
2050 continue
2051 player_url = self._search_regex(
2052 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
2053 webpage, 'player URL', fatal=False)
2054 if not player_url:
201e9eaa 2055 continue
545cc85d 2056 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2057 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2058 fmt_url += '&' + sp + '=' + signature
2059
545cc85d 2060 if itag:
2061 itags.append(itag)
9297939e 2062 stream_ids.append(stream_id)
2063
cc2db878 2064 tbr = float_or_none(
2065 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2066 dct = {
2067 'asr': int_or_none(fmt.get('audioSampleRate')),
2068 'filesize': int_or_none(fmt.get('contentLength')),
2069 'format_id': itag,
0fb983f6 2070 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2071 'fps': int_or_none(fmt.get('fps')),
2072 'height': int_or_none(fmt.get('height')),
dca3ff4a 2073 'quality': q(quality),
cc2db878 2074 'tbr': tbr,
545cc85d 2075 'url': fmt_url,
2076 'width': fmt.get('width'),
0fb983f6 2077 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2078 }
2079 mimetype = fmt.get('mimeType')
2080 if mimetype:
2081 mobj = re.match(
2082 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2083 if mobj:
2084 dct['ext'] = mimetype2ext(mobj.group(1))
2085 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2086 no_audio = dct.get('acodec') == 'none'
2087 no_video = dct.get('vcodec') == 'none'
2088 if no_audio:
2089 dct['vbr'] = tbr
2090 if no_video:
2091 dct['abr'] = tbr
2092 if no_audio or no_video:
545cc85d 2093 dct['downloader_options'] = {
2094 # Youtube throttles chunks >~10M
2095 'http_chunk_size': 10485760,
bf1317d2 2096 }
7c60c33e 2097 if dct.get('ext'):
2098 dct['container'] = dct['ext'] + '_dash'
545cc85d 2099 formats.append(dct)
2100
9297939e 2101 for sd in (streaming_data, ytm_streaming_data):
2102 hls_manifest_url = sd.get('hlsManifestUrl')
2103 if hls_manifest_url:
2104 for f in self._extract_m3u8_formats(
2105 hls_manifest_url, video_id, 'mp4', fatal=False):
2106 itag = self._search_regex(
2107 r'/itag/(\d+)', f['url'], 'itag', default=None)
2108 if itag:
2109 f['format_id'] = itag
545cc85d 2110 formats.append(f)
2111
a06916d9 2112 if self.get_param('youtube_include_dash_manifest', True):
9297939e 2113 for sd in (streaming_data, ytm_streaming_data):
2114 dash_manifest_url = sd.get('dashManifestUrl')
2115 if dash_manifest_url:
2116 for f in self._extract_mpd_formats(
2117 dash_manifest_url, video_id, fatal=False):
2118 itag = f['format_id']
2119 if itag in itags:
2120 continue
2121 if itag in itag_qualities:
9297939e 2122 f['quality'] = q(itag_qualities[itag])
2123 filesize = int_or_none(self._search_regex(
2124 r'/clen/(\d+)', f.get('fragment_base_url')
2125 or f['url'], 'file size', default=None))
2126 if filesize:
2127 f['filesize'] = filesize
2128 formats.append(f)
bf1317d2 2129
545cc85d 2130 if not formats:
a06916d9 2131 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2132 self.raise_no_formats(
545cc85d 2133 'This video is DRM protected.', expected=True)
2134 pemr = try_get(
2135 playability_status,
2136 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2137 dict) or {}
2138 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2139 subreason = pemr.get('subreason')
2140 if subreason:
2141 subreason = clean_html(get_text(subreason))
2142 if subreason == 'The uploader has not made this video available in your country.':
2143 countries = microformat.get('availableCountries')
2144 if not countries:
2145 regions_allowed = search_meta('regionsAllowed')
2146 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2147 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2148 reason += '\n' + subreason
2149 if reason:
b7da73eb 2150 self.raise_no_formats(reason, expected=True)
bf1317d2 2151
545cc85d 2152 self._sort_formats(formats)
bf1317d2 2153
545cc85d 2154 keywords = video_details.get('keywords') or []
2155 if not keywords and webpage:
2156 keywords = [
2157 unescapeHTML(m.group('content'))
2158 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2159 for keyword in keywords:
2160 if keyword.startswith('yt:stretch='):
201c1459 2161 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2162 if mobj:
2163 # NB: float is intentional for forcing float division
2164 w, h = (float(v) for v in mobj.groups())
2165 if w > 0 and h > 0:
2166 ratio = w / h
2167 for f in formats:
2168 if f.get('vcodec') != 'none':
2169 f['stretched_ratio'] = ratio
2170 break
6449cd80 2171
545cc85d 2172 thumbnails = []
2173 for container in (video_details, microformat):
2174 for thumbnail in (try_get(
2175 container,
2176 lambda x: x['thumbnail']['thumbnails'], list) or []):
2177 thumbnail_url = thumbnail.get('url')
2178 if not thumbnail_url:
bf1317d2 2179 continue
1988fab7 2180 # Sometimes youtube gives a wrong thumbnail URL. See:
2181 # https://github.com/yt-dlp/yt-dlp/issues/233
2182 # https://github.com/ytdl-org/youtube-dl/issues/28023
2183 if 'maxresdefault' in thumbnail_url:
2184 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2185 thumbnails.append({
545cc85d 2186 'url': thumbnail_url,
ff2751ac 2187 'height': int_or_none(thumbnail.get('height')),
545cc85d 2188 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2189 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2190 })
ff2751ac 2191 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2192 if thumbnail_url:
2193 thumbnails.append({
2194 'url': thumbnail_url,
2195 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2196 })
2197 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2198 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2199 thumbnails.append({
2200 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2201 'preference': 1,
2202 })
2203 self._remove_duplicate_formats(thumbnails)
545cc85d 2204
2205 category = microformat.get('category') or search_meta('genre')
2206 channel_id = video_details.get('channelId') \
2207 or microformat.get('externalChannelId') \
2208 or search_meta('channelId')
2209 duration = int_or_none(
2210 video_details.get('lengthSeconds')
2211 or microformat.get('lengthSeconds')) \
2212 or parse_duration(search_meta('duration'))
2213 is_live = video_details.get('isLive')
2214 owner_profile_url = microformat.get('ownerProfileUrl')
2215
2216 info = {
2217 'id': video_id,
2218 'title': self._live_title(video_title) if is_live else video_title,
2219 'formats': formats,
2220 'thumbnails': thumbnails,
2221 'description': video_description,
2222 'upload_date': unified_strdate(
2223 microformat.get('uploadDate')
2224 or search_meta('uploadDate')),
2225 'uploader': video_details['author'],
2226 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2227 'uploader_url': owner_profile_url,
2228 'channel_id': channel_id,
2229 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2230 'duration': duration,
2231 'view_count': int_or_none(
2232 video_details.get('viewCount')
2233 or microformat.get('viewCount')
2234 or search_meta('interactionCount')),
2235 'average_rating': float_or_none(video_details.get('averageRating')),
2236 'age_limit': 18 if (
2237 microformat.get('isFamilySafe') is False
2238 or search_meta('isFamilyFriendly') == 'false'
2239 or search_meta('og:restrictions:age') == '18+') else 0,
2240 'webpage_url': webpage_url,
2241 'categories': [category] if category else None,
2242 'tags': keywords,
2243 'is_live': is_live,
2244 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2245 'was_live': video_details.get('isLiveContent'),
545cc85d 2246 }
b477fc13 2247
545cc85d 2248 pctr = try_get(
2249 player_response,
2250 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2251 subtitles = {}
2252 if pctr:
774d79cc 2253 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2254 lang_subs = container.setdefault(lang_code, [])
545cc85d 2255 for fmt in self._SUBTITLE_FORMATS:
2256 query.update({
2257 'fmt': fmt,
2258 })
2259 lang_subs.append({
2260 'ext': fmt,
2261 'url': update_url_query(base_url, query),
774d79cc 2262 'name': sub_name,
545cc85d 2263 })
7e72694b 2264
545cc85d 2265 for caption_track in (pctr.get('captionTracks') or []):
2266 base_url = caption_track.get('baseUrl')
2267 if not base_url:
2268 continue
2269 if caption_track.get('kind') != 'asr':
120916da 2270 lang_code = (
2271 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2272 or caption_track.get('languageCode'))
545cc85d 2273 if not lang_code:
2274 continue
2275 process_language(
774d79cc 2276 subtitles, base_url, lang_code,
2277 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2278 {})
545cc85d 2279 continue
2280 automatic_captions = {}
2281 for translation_language in (pctr.get('translationLanguages') or []):
2282 translation_language_code = translation_language.get('languageCode')
2283 if not translation_language_code:
2284 continue
2285 process_language(
2286 automatic_captions, base_url, translation_language_code,
774d79cc 2287 try_get(translation_language, lambda x: x['languageName']['simpleText']),
545cc85d 2288 {'tlang': translation_language_code})
2289 info['automatic_captions'] = automatic_captions
2290 info['subtitles'] = subtitles
7e72694b 2291
545cc85d 2292 parsed_url = compat_urllib_parse_urlparse(url)
2293 for component in [parsed_url.fragment, parsed_url.query]:
2294 query = compat_parse_qs(component)
2295 for k, v in query.items():
2296 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2297 d_k += '_time'
2298 if d_k not in info and k in s_ks:
2299 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2300
2301 # Youtube Music Auto-generated description
822b9d9c 2302 if video_description:
38d70284 2303 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2304 if mobj:
822b9d9c
RA
2305 release_year = mobj.group('release_year')
2306 release_date = mobj.group('release_date')
2307 if release_date:
2308 release_date = release_date.replace('-', '')
2309 if not release_year:
545cc85d 2310 release_year = release_date[:4]
2311 info.update({
2312 'album': mobj.group('album'.strip()),
2313 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2314 'track': mobj.group('track').strip(),
2315 'release_date': release_date,
cc2db878 2316 'release_year': int_or_none(release_year),
545cc85d 2317 })
7e72694b 2318
545cc85d 2319 initial_data = None
2320 if webpage:
2321 initial_data = self._extract_yt_initial_variable(
2322 webpage, self._YT_INITIAL_DATA_RE, video_id,
2323 'yt initial data')
2324 if not initial_data:
2325 initial_data = self._call_api(
f4f751af 2326 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2327
2328 if not is_live:
2329 try:
2330 # This will error if there is no livechat
2331 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2332 info['subtitles']['live_chat'] = [{
394dcd44 2333 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2334 'video_id': video_id,
2335 'ext': 'json',
2336 'protocol': 'youtube_live_chat_replay',
2337 }]
2338 except (KeyError, IndexError, TypeError):
2339 pass
2340
2341 if initial_data:
2342 chapters = self._extract_chapters_from_json(
2343 initial_data, video_id, duration)
2344 if not chapters:
2345 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2346 contents = try_get(
2347 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2348 list)
2349 if not contents:
2350 continue
2351
2352 def chapter_time(mmlir):
2353 return parse_duration(
2354 get_text(mmlir.get('timeDescription')))
2355
2356 chapters = []
2357 for next_num, content in enumerate(contents, start=1):
2358 mmlir = content.get('macroMarkersListItemRenderer') or {}
2359 start_time = chapter_time(mmlir)
2360 end_time = chapter_time(try_get(
2361 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2362 if next_num < len(contents) else duration
2363 if start_time is None or end_time is None:
2364 continue
2365 chapters.append({
2366 'start_time': start_time,
2367 'end_time': end_time,
2368 'title': get_text(mmlir.get('title')),
2369 })
2370 if chapters:
2371 break
2372 if chapters:
2373 info['chapters'] = chapters
2374
2375 contents = try_get(
2376 initial_data,
2377 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2378 list) or []
2379 for content in contents:
2380 vpir = content.get('videoPrimaryInfoRenderer')
2381 if vpir:
2382 stl = vpir.get('superTitleLink')
2383 if stl:
2384 stl = get_text(stl)
2385 if try_get(
2386 vpir,
2387 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2388 info['location'] = stl
2389 else:
2390 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2391 if mobj:
2392 info.update({
2393 'series': mobj.group(1),
2394 'season_number': int(mobj.group(2)),
2395 'episode_number': int(mobj.group(3)),
2396 })
2397 for tlb in (try_get(
2398 vpir,
2399 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2400 list) or []):
2401 tbr = tlb.get('toggleButtonRenderer') or {}
2402 for getter, regex in [(
2403 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2404 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2405 lambda x: x['accessibility'],
2406 lambda x: x['accessibilityData']['accessibilityData'],
2407 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2408 label = (try_get(tbr, getter, dict) or {}).get('label')
2409 if label:
2410 mobj = re.match(regex, label)
2411 if mobj:
2412 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2413 break
2414 sbr_tooltip = try_get(
2415 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2416 if sbr_tooltip:
2417 like_count, dislike_count = sbr_tooltip.split(' / ')
2418 info.update({
2419 'like_count': str_to_int(like_count),
2420 'dislike_count': str_to_int(dislike_count),
2421 })
2422 vsir = content.get('videoSecondaryInfoRenderer')
2423 if vsir:
2424 info['channel'] = get_text(try_get(
2425 vsir,
2426 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2427 dict))
545cc85d 2428 rows = try_get(
2429 vsir,
2430 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2431 list) or []
2432 multiple_songs = False
2433 for row in rows:
2434 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2435 multiple_songs = True
2436 break
2437 for row in rows:
2438 mrr = row.get('metadataRowRenderer') or {}
2439 mrr_title = mrr.get('title')
2440 if not mrr_title:
2441 continue
2442 mrr_title = get_text(mrr['title'])
2443 mrr_contents_text = get_text(mrr['contents'][0])
2444 if mrr_title == 'License':
2445 info['license'] = mrr_contents_text
2446 elif not multiple_songs:
2447 if mrr_title == 'Album':
2448 info['album'] = mrr_contents_text
2449 elif mrr_title == 'Artist':
2450 info['artist'] = mrr_contents_text
2451 elif mrr_title == 'Song':
2452 info['track'] = mrr_contents_text
2453
2454 fallbacks = {
2455 'channel': 'uploader',
2456 'channel_id': 'uploader_id',
2457 'channel_url': 'uploader_url',
2458 }
2459 for to, frm in fallbacks.items():
2460 if not info.get(to):
2461 info[to] = info.get(frm)
2462
2463 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2464 v = info.get(s_k)
2465 if v:
2466 info[d_k] = v
b84071c0 2467
c224251a
M
2468 is_private = bool_or_none(video_details.get('isPrivate'))
2469 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2470 is_membersonly = None
b28f8d24 2471 is_premium = None
c224251a
M
2472 if initial_data and is_private is not None:
2473 is_membersonly = False
b28f8d24 2474 is_premium = False
c224251a
M
2475 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2476 for content in contents or []:
2477 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2478 for badge in badges or []:
2479 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2480 if label.lower() == 'members only':
2481 is_membersonly = True
2482 break
b28f8d24
M
2483 elif label.lower() == 'premium':
2484 is_premium = True
2485 break
2486 if is_membersonly or is_premium:
c224251a
M
2487 break
2488
2489 # TODO: Add this for playlists
2490 info['availability'] = self._availability(
2491 is_private=is_private,
b28f8d24 2492 needs_premium=is_premium,
c224251a
M
2493 needs_subscription=is_membersonly,
2494 needs_auth=info['age_limit'] >= 18,
2495 is_unlisted=None if is_private is None else is_unlisted)
2496
06167fbb 2497 # get xsrf for annotations or comments
a06916d9 2498 get_annotations = self.get_param('writeannotations', False)
2499 get_comments = self.get_param('getcomments', False)
06167fbb 2500 if get_annotations or get_comments:
29f7c58a 2501 xsrf_token = None
545cc85d 2502 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2503 if ytcfg:
2504 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2505 if not xsrf_token:
2506 xsrf_token = self._search_regex(
2507 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2508 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2509
2510 # annotations
06167fbb 2511 if get_annotations:
64b6a4e9
RA
2512 invideo_url = try_get(
2513 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2514 if xsrf_token and invideo_url:
29f7c58a 2515 xsrf_field_name = None
2516 if ytcfg:
2517 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2518 if not xsrf_field_name:
2519 xsrf_field_name = self._search_regex(
2520 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2521 webpage, 'xsrf field name',
29f7c58a 2522 group='xsrf_field_name', default='session_token')
8a784c74 2523 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2524 self._proto_relative_url(invideo_url),
2525 video_id, note='Downloading annotations',
2526 errnote='Unable to download video annotations', fatal=False,
2527 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2528
277d6ff5 2529 if get_comments:
a1c5d2ca 2530 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2531
545cc85d 2532 self.mark_watched(video_id, player_response)
d77ab8e2 2533
545cc85d 2534 return info
c5e8d7af 2535
5f6a1245 2536
8bdd16b4 2537class YoutubeTabIE(YoutubeBaseInfoExtractor):
2538 IE_DESC = 'YouTube.com tab'
70d5c17b 2539 _VALID_URL = r'''(?x)
2540 https?://
2541 (?:\w+\.)?
2542 (?:
2543 youtube(?:kids)?\.com|
2544 invidio\.us
2545 )/
2546 (?:
fe03a6cd 2547 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 2548 (?P<not_channel>
9ba5705a 2549 feed/|hashtag/|
70d5c17b 2550 (?:playlist|watch)\?.*?\blist=
2551 )|
29f7c58a 2552 (?!(?:%s)\b) # Direct URLs
70d5c17b 2553 )
2554 (?P<id>[^/?\#&]+)
2555 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2556 IE_NAME = 'youtube:tab'
2557
81127aa5 2558 _TESTS = [{
da692b79 2559 'note': 'playlists, multipage',
8bdd16b4 2560 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2561 'playlist_mincount': 94,
2562 'info_dict': {
2563 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2564 'title': 'Игорь Клейнер - Playlists',
2565 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2566 'uploader': 'Игорь Клейнер',
2567 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2568 },
2569 }, {
da692b79 2570 'note': 'playlists, multipage, different order',
8bdd16b4 2571 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2572 'playlist_mincount': 94,
2573 'info_dict': {
2574 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2575 'title': 'Игорь Клейнер - Playlists',
2576 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2577 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2578 'uploader': 'Игорь Клейнер',
8bdd16b4 2579 },
201c1459 2580 }, {
da692b79 2581 'note': 'playlists, series',
201c1459 2582 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2583 'playlist_mincount': 5,
2584 'info_dict': {
2585 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2586 'title': '3Blue1Brown - Playlists',
2587 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 2588 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2589 'uploader': '3Blue1Brown',
201c1459 2590 },
8bdd16b4 2591 }, {
da692b79 2592 'note': 'playlists, singlepage',
8bdd16b4 2593 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2594 'playlist_mincount': 4,
2595 'info_dict': {
2596 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2597 'title': 'ThirstForScience - Playlists',
2598 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2599 'uploader': 'ThirstForScience',
2600 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2601 }
2602 }, {
2603 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2604 'only_matching': True,
2605 }, {
da692b79 2606 'note': 'basic, single video playlist',
0e30a7b9 2607 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2608 'info_dict': {
0e30a7b9 2609 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2610 'uploader': 'Sergey M.',
2611 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2612 'title': 'youtube-dl public playlist',
81127aa5 2613 },
0e30a7b9 2614 'playlist_count': 1,
9291475f 2615 }, {
da692b79 2616 'note': 'empty playlist',
0e30a7b9 2617 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2618 'info_dict': {
0e30a7b9 2619 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2620 'uploader': 'Sergey M.',
2621 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2622 'title': 'youtube-dl empty playlist',
9291475f
PH
2623 },
2624 'playlist_count': 0,
2625 }, {
da692b79 2626 'note': 'Home tab',
8bdd16b4 2627 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2628 'info_dict': {
8bdd16b4 2629 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2630 'title': 'lex will - Home',
2631 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2632 'uploader': 'lex will',
2633 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2634 },
8bdd16b4 2635 'playlist_mincount': 2,
9291475f 2636 }, {
da692b79 2637 'note': 'Videos tab',
8bdd16b4 2638 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2639 'info_dict': {
8bdd16b4 2640 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2641 'title': 'lex will - Videos',
2642 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2643 'uploader': 'lex will',
2644 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2645 },
8bdd16b4 2646 'playlist_mincount': 975,
9291475f 2647 }, {
da692b79 2648 'note': 'Videos tab, sorted by popular',
8bdd16b4 2649 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2650 'info_dict': {
8bdd16b4 2651 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2652 'title': 'lex will - Videos',
2653 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2654 'uploader': 'lex will',
2655 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2656 },
8bdd16b4 2657 'playlist_mincount': 199,
9291475f 2658 }, {
da692b79 2659 'note': 'Playlists tab',
8bdd16b4 2660 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2661 'info_dict': {
8bdd16b4 2662 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2663 'title': 'lex will - Playlists',
2664 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2665 'uploader': 'lex will',
2666 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2667 },
8bdd16b4 2668 'playlist_mincount': 17,
ac7553d0 2669 }, {
da692b79 2670 'note': 'Community tab',
8bdd16b4 2671 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2672 'info_dict': {
8bdd16b4 2673 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2674 'title': 'lex will - Community',
2675 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2676 'uploader': 'lex will',
2677 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2678 },
2679 'playlist_mincount': 18,
87dadd45 2680 }, {
da692b79 2681 'note': 'Channels tab',
8bdd16b4 2682 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2683 'info_dict': {
8bdd16b4 2684 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2685 'title': 'lex will - Channels',
2686 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2687 'uploader': 'lex will',
2688 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2689 },
deaec5af 2690 'playlist_mincount': 12,
cd684175 2691 }, {
2692 'note': 'Search tab',
2693 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
2694 'playlist_mincount': 40,
2695 'info_dict': {
2696 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2697 'title': '3Blue1Brown - Search - linear algebra',
2698 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2699 'uploader': '3Blue1Brown',
2700 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2701 },
6b08cdf6 2702 }, {
a0566bbf 2703 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2704 'only_matching': True,
2705 }, {
a0566bbf 2706 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2707 'only_matching': True,
2708 }, {
a0566bbf 2709 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2710 'only_matching': True,
2711 }, {
2712 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2713 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2714 'info_dict': {
2715 'title': '29C3: Not my department',
2716 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2717 'uploader': 'Christiaan008',
2718 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2719 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2720 },
2721 'playlist_count': 96,
2722 }, {
2723 'note': 'Large playlist',
2724 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2725 'info_dict': {
8bdd16b4 2726 'title': 'Uploads from Cauchemar',
2727 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2728 'uploader': 'Cauchemar',
2729 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2730 },
8bdd16b4 2731 'playlist_mincount': 1123,
2732 }, {
da692b79 2733 'note': 'even larger playlist, 8832 videos',
8bdd16b4 2734 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2735 'only_matching': True,
4b7df0d3
JMF
2736 }, {
2737 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2738 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2739 'info_dict': {
acf757f4
PH
2740 'title': 'Uploads from Interstellar Movie',
2741 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2742 'uploader': 'Interstellar Movie',
8bdd16b4 2743 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2744 },
481cc733 2745 'playlist_mincount': 21,
358de58c 2746 }, {
2747 'note': 'Playlist with "show unavailable videos" button',
2748 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2749 'info_dict': {
2750 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2751 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2752 'uploader': 'Phim Siêu Nhân Nhật Bản',
2753 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2754 },
da692b79 2755 'playlist_mincount': 200,
5d342002 2756 }, {
da692b79 2757 'note': 'Playlist with unavailable videos in page 7',
5d342002 2758 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2759 'info_dict': {
2760 'title': 'Uploads from BlankTV',
2761 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2762 'uploader': 'BlankTV',
2763 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2764 },
da692b79 2765 'playlist_mincount': 1000,
8bdd16b4 2766 }, {
da692b79 2767 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 2768 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2769 'info_dict': {
2770 'title': 'Data Analysis with Dr Mike Pound',
2771 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2772 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2773 'uploader': 'Computerphile',
deaec5af 2774 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2775 },
2776 'playlist_mincount': 11,
2777 }, {
a0566bbf 2778 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2779 'only_matching': True,
dacb3a86 2780 }, {
da692b79 2781 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
2782 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2783 'info_dict': {
2784 'id': 'FqZTN594JQw',
2785 'ext': 'webm',
2786 'title': "Smiley's People 01 detective, Adventure Series, Action",
2787 'uploader': 'STREEM',
2788 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2789 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2790 'upload_date': '20150526',
2791 'license': 'Standard YouTube License',
2792 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2793 'categories': ['People & Blogs'],
2794 'tags': list,
dbdaaa23 2795 'view_count': int,
dacb3a86
S
2796 'like_count': int,
2797 'dislike_count': int,
2798 },
2799 'params': {
2800 'skip_download': True,
2801 },
13a75688 2802 'skip': 'This video is not available.',
dacb3a86 2803 'add_ie': [YoutubeIE.ie_key()],
481cc733 2804 }, {
8bdd16b4 2805 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2806 'only_matching': True,
66b48727 2807 }, {
8bdd16b4 2808 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2809 'only_matching': True,
a0566bbf 2810 }, {
2811 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2812 'info_dict': {
da692b79 2813 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 2814 'ext': 'mp4',
deaec5af 2815 'title': compat_str,
a0566bbf 2816 'uploader': 'Sky News',
2817 'uploader_id': 'skynews',
2818 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 2819 'upload_date': r're:\d{8}',
2820 'description': compat_str,
a0566bbf 2821 'categories': ['News & Politics'],
2822 'tags': list,
2823 'like_count': int,
2824 'dislike_count': int,
2825 },
2826 'params': {
2827 'skip_download': True,
2828 },
da692b79 2829 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 2830 }, {
2831 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2832 'info_dict': {
2833 'id': 'a48o2S1cPoo',
2834 'ext': 'mp4',
2835 'title': 'The Young Turks - Live Main Show',
2836 'uploader': 'The Young Turks',
2837 'uploader_id': 'TheYoungTurks',
2838 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2839 'upload_date': '20150715',
2840 'license': 'Standard YouTube License',
2841 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2842 'categories': ['News & Politics'],
2843 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2844 'like_count': int,
2845 'dislike_count': int,
2846 },
2847 'params': {
2848 'skip_download': True,
2849 },
2850 'only_matching': True,
2851 }, {
2852 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2853 'only_matching': True,
2854 }, {
2855 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2856 'only_matching': True,
3d3dddc9 2857 }, {
2858 'url': 'https://www.youtube.com/feed/trending',
2859 'only_matching': True,
2860 }, {
3d3dddc9 2861 'url': 'https://www.youtube.com/feed/library',
2862 'only_matching': True,
2863 }, {
3d3dddc9 2864 'url': 'https://www.youtube.com/feed/history',
2865 'only_matching': True,
2866 }, {
3d3dddc9 2867 'url': 'https://www.youtube.com/feed/subscriptions',
2868 'only_matching': True,
2869 }, {
3d3dddc9 2870 'url': 'https://www.youtube.com/feed/watch_later',
2871 'only_matching': True,
2872 }, {
da692b79 2873 'note': 'Recommended - redirects to home page',
3d3dddc9 2874 'url': 'https://www.youtube.com/feed/recommended',
2875 'only_matching': True,
29f7c58a 2876 }, {
da692b79 2877 'note': 'inline playlist with not always working continuations',
29f7c58a 2878 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2879 'only_matching': True,
2880 }, {
2881 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2882 'only_matching': True,
2883 }, {
2884 'url': 'https://www.youtube.com/course',
2885 'only_matching': True,
2886 }, {
2887 'url': 'https://www.youtube.com/zsecurity',
2888 'only_matching': True,
2889 }, {
2890 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2891 'only_matching': True,
2892 }, {
2893 'url': 'https://www.youtube.com/TheYoungTurks/live',
2894 'only_matching': True,
39ed931e 2895 }, {
2896 'url': 'https://www.youtube.com/hashtag/cctv9',
2897 'info_dict': {
2898 'id': 'cctv9',
2899 'title': '#cctv9',
2900 },
2901 'playlist_mincount': 350,
201c1459 2902 }, {
2903 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2904 'only_matching': True,
9297939e 2905 }, {
da692b79 2906 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 2907 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2908 'only_matching': True
fe03a6cd 2909 }, {
2910 'note': '/browse/ should redirect to /channel/',
2911 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
2912 'only_matching': True
2913 }, {
2914 'note': 'VLPL, should redirect to playlist?list=PL...',
2915 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2916 'info_dict': {
2917 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2918 'uploader': 'NoCopyrightSounds',
2919 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
2920 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
2921 'title': 'NCS Releases',
2922 },
2923 'playlist_mincount': 166,
18db7548 2924 }, {
2925 'note': 'Topic, should redirect to playlist?list=UU...',
2926 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
2927 'info_dict': {
2928 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
2929 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
2930 'title': 'Uploads from Royalty Free Music - Topic',
2931 'uploader': 'Royalty Free Music - Topic',
2932 },
2933 'expected_warnings': [
2934 'A channel/user page was given',
2935 'The URL does not have a videos tab',
2936 ],
2937 'playlist_mincount': 101,
2938 }, {
2939 'note': 'Topic without a UU playlist',
2940 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
2941 'info_dict': {
2942 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
2943 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
2944 },
2945 'expected_warnings': [
2946 'A channel/user page was given',
2947 'The URL does not have a videos tab',
2948 'Falling back to channel URL',
2949 ],
2950 'playlist_mincount': 9,
abcdd12b 2951 }, {
2952 'note': 'Youtube music Album',
2953 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
2954 'info_dict': {
2955 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
2956 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
2957 },
2958 'playlist_count': 50,
29f7c58a 2959 }]
2960
2961 @classmethod
2962 def suitable(cls, url):
2963 return False if YoutubeIE.suitable(url) else super(
2964 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2965
2966 def _extract_channel_id(self, webpage):
2967 channel_id = self._html_search_meta(
2968 'channelId', webpage, 'channel id', default=None)
2969 if channel_id:
2970 return channel_id
2971 channel_url = self._html_search_meta(
2972 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2973 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2974 'twitter:app:url:googleplay'), webpage, 'channel url')
2975 return self._search_regex(
2976 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2977 channel_url, 'channel id')
15f6397c 2978
8bdd16b4 2979 @staticmethod
cd7c66cf 2980 def _extract_basic_item_renderer(item):
2981 # Modified from _extract_grid_item_renderer
201c1459 2982 known_basic_renderers = (
2983 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 2984 )
2985 for key, renderer in item.items():
201c1459 2986 if not isinstance(renderer, dict):
cd7c66cf 2987 continue
201c1459 2988 elif key in known_basic_renderers:
2989 return renderer
2990 elif key.startswith('grid') and key.endswith('Renderer'):
2991 return renderer
8bdd16b4 2992
8bdd16b4 2993 def _grid_entries(self, grid_renderer):
2994 for item in grid_renderer['items']:
2995 if not isinstance(item, dict):
39b62db1 2996 continue
cd7c66cf 2997 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2998 if not isinstance(renderer, dict):
2999 continue
3000 title = try_get(
201c1459 3001 renderer, (lambda x: x['title']['runs'][0]['text'],
3002 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 3003 # playlist
3004 playlist_id = renderer.get('playlistId')
3005 if playlist_id:
3006 yield self.url_result(
3007 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3008 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3009 video_title=title)
201c1459 3010 continue
8bdd16b4 3011 # video
3012 video_id = renderer.get('videoId')
3013 if video_id:
3014 yield self._extract_video(renderer)
201c1459 3015 continue
8bdd16b4 3016 # channel
3017 channel_id = renderer.get('channelId')
3018 if channel_id:
3019 title = try_get(
3020 renderer, lambda x: x['title']['simpleText'], compat_str)
3021 yield self.url_result(
3022 'https://www.youtube.com/channel/%s' % channel_id,
3023 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3024 continue
3025 # generic endpoint URL support
3026 ep_url = urljoin('https://www.youtube.com/', try_get(
3027 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3028 compat_str))
3029 if ep_url:
3030 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3031 if ie.suitable(ep_url):
3032 yield self.url_result(
3033 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3034 break
8bdd16b4 3035
3d3dddc9 3036 def _shelf_entries_from_content(self, shelf_renderer):
3037 content = shelf_renderer.get('content')
3038 if not isinstance(content, dict):
8bdd16b4 3039 return
cd7c66cf 3040 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3041 if renderer:
3042 # TODO: add support for nested playlists so each shelf is processed
3043 # as separate playlist
3044 # TODO: this includes only first N items
3045 for entry in self._grid_entries(renderer):
3046 yield entry
3047 renderer = content.get('horizontalListRenderer')
3048 if renderer:
3049 # TODO
3050 pass
8bdd16b4 3051
29f7c58a 3052 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3053 ep = try_get(
3054 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3055 compat_str)
3056 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3057 if shelf_url:
29f7c58a 3058 # Skipping links to another channels, note that checking for
3059 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3060 # will not work
3061 if skip_channels and '/channels?' in shelf_url:
3062 return
3d3dddc9 3063 title = try_get(
3064 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3065 yield self.url_result(shelf_url, video_title=title)
3066 # Shelf may not contain shelf URL, fallback to extraction from content
3067 for entry in self._shelf_entries_from_content(shelf_renderer):
3068 yield entry
c5e8d7af 3069
8bdd16b4 3070 def _playlist_entries(self, video_list_renderer):
3071 for content in video_list_renderer['contents']:
3072 if not isinstance(content, dict):
3073 continue
3074 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3075 if not isinstance(renderer, dict):
3076 continue
3077 video_id = renderer.get('videoId')
3078 if not video_id:
3079 continue
3080 yield self._extract_video(renderer)
07aeced6 3081
3462ffa8 3082 def _rich_entries(self, rich_grid_renderer):
3083 renderer = try_get(
70d5c17b 3084 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3085 video_id = renderer.get('videoId')
3086 if not video_id:
3087 return
3088 yield self._extract_video(renderer)
3089
8bdd16b4 3090 def _video_entry(self, video_renderer):
3091 video_id = video_renderer.get('videoId')
3092 if video_id:
3093 return self._extract_video(video_renderer)
dacb3a86 3094
8bdd16b4 3095 def _post_thread_entries(self, post_thread_renderer):
3096 post_renderer = try_get(
3097 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3098 if not post_renderer:
3099 return
3100 # video attachment
3101 video_renderer = try_get(
895b0931 3102 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3103 video_id = video_renderer.get('videoId')
3104 if video_id:
3105 entry = self._extract_video(video_renderer)
8bdd16b4 3106 if entry:
3107 yield entry
895b0931 3108 # playlist attachment
3109 playlist_id = try_get(
3110 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3111 if playlist_id:
3112 yield self.url_result(
e28f1c0a 3113 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3114 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3115 # inline video links
3116 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3117 for run in runs:
3118 if not isinstance(run, dict):
3119 continue
3120 ep_url = try_get(
3121 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3122 if not ep_url:
3123 continue
3124 if not YoutubeIE.suitable(ep_url):
3125 continue
3126 ep_video_id = YoutubeIE._match_id(ep_url)
3127 if video_id == ep_video_id:
3128 continue
895b0931 3129 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3130
8bdd16b4 3131 def _post_thread_continuation_entries(self, post_thread_continuation):
3132 contents = post_thread_continuation.get('contents')
3133 if not isinstance(contents, list):
3134 return
3135 for content in contents:
3136 renderer = content.get('backstagePostThreadRenderer')
3137 if not isinstance(renderer, dict):
3138 continue
3139 for entry in self._post_thread_entries(renderer):
3140 yield entry
07aeced6 3141
39ed931e 3142 r''' # unused
3143 def _rich_grid_entries(self, contents):
3144 for content in contents:
3145 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3146 if video_renderer:
3147 entry = self._video_entry(video_renderer)
3148 if entry:
3149 yield entry
3150 '''
3151
29f7c58a 3152 @staticmethod
3153 def _build_continuation_query(continuation, ctp=None):
3154 query = {
3155 'ctoken': continuation,
3156 'continuation': continuation,
3157 }
3158 if ctp:
3159 query['itct'] = ctp
3160 return query
3161
8bdd16b4 3162 @staticmethod
3163 def _extract_next_continuation_data(renderer):
3164 next_continuation = try_get(
3165 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3166 if not next_continuation:
3167 return
3168 continuation = next_continuation.get('continuation')
3169 if not continuation:
3170 return
3171 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3172 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3173
8bdd16b4 3174 @classmethod
3175 def _extract_continuation(cls, renderer):
3176 next_continuation = cls._extract_next_continuation_data(renderer)
3177 if next_continuation:
3178 return next_continuation
cc2db878 3179 contents = []
3180 for key in ('contents', 'items'):
3181 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3182 for content in contents:
3183 if not isinstance(content, dict):
3184 continue
3185 continuation_ep = try_get(
3186 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3187 dict)
3188 if not continuation_ep:
3189 continue
3190 continuation = try_get(
3191 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3192 if not continuation:
3193 continue
3194 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3195 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3196
f4f751af 3197 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3198
70d5c17b 3199 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3200 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3201 for content in contents:
3202 if not isinstance(content, dict):
8bdd16b4 3203 continue
70d5c17b 3204 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3205 if not is_renderer:
70d5c17b 3206 renderer = content.get('richItemRenderer')
3462ffa8 3207 if renderer:
3208 for entry in self._rich_entries(renderer):
3209 yield entry
3210 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3211 continue
3462ffa8 3212 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3213 for isr_content in isr_contents:
3214 if not isinstance(isr_content, dict):
3215 continue
69184e41 3216
3217 known_renderers = {
3218 'playlistVideoListRenderer': self._playlist_entries,
3219 'gridRenderer': self._grid_entries,
3220 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3221 'backstagePostThreadRenderer': self._post_thread_entries,
3222 'videoRenderer': lambda x: [self._video_entry(x)],
3223 }
3224 for key, renderer in isr_content.items():
3225 if key not in known_renderers:
3226 continue
3227 for entry in known_renderers[key](renderer):
3228 if entry:
3229 yield entry
3462ffa8 3230 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3231 break
70d5c17b 3232
3462ffa8 3233 if not continuation_list[0]:
3234 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3235
3236 if not continuation_list[0]:
3237 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3238
3239 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3240 tab_content = try_get(tab, lambda x: x['content'], dict)
3241 if not tab_content:
3242 return
3462ffa8 3243 parent_renderer = (
29f7c58a 3244 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3245 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3246 for entry in extract_entries(parent_renderer):
3247 yield entry
3462ffa8 3248 continuation = continuation_list[0]
f4f751af 3249 context = self._extract_context(ytcfg)
3250 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3251
8bdd16b4 3252 for page_num in itertools.count(1):
3253 if not continuation:
3254 break
79360d99 3255 query = {
3256 'continuation': continuation['continuation'],
3257 'clickTracking': {'clickTrackingParams': continuation['itct']}
3258 }
f4f751af 3259 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3260 response = self._extract_response(
3261 item_id='%s page %s' % (item_id, page_num),
3262 query=query, headers=headers, ytcfg=ytcfg,
3263 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3264
3265 if not response:
8bdd16b4 3266 break
f4f751af 3267 visitor_data = try_get(
3268 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3269
69184e41 3270 known_continuation_renderers = {
3271 'playlistVideoListContinuation': self._playlist_entries,
3272 'gridContinuation': self._grid_entries,
3273 'itemSectionContinuation': self._post_thread_continuation_entries,
3274 'sectionListContinuation': extract_entries, # for feeds
3275 }
8bdd16b4 3276 continuation_contents = try_get(
69184e41 3277 response, lambda x: x['continuationContents'], dict) or {}
3278 continuation_renderer = None
3279 for key, value in continuation_contents.items():
3280 if key not in known_continuation_renderers:
3462ffa8 3281 continue
69184e41 3282 continuation_renderer = value
3283 continuation_list = [None]
3284 for entry in known_continuation_renderers[key](continuation_renderer):
3285 yield entry
3286 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3287 break
3288 if continuation_renderer:
3289 continue
c5e8d7af 3290
a1b535bd 3291 known_renderers = {
3292 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3293 'gridVideoRenderer': (self._grid_entries, 'items'),
3294 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3295 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3296 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3297 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3298 }
cce889b9 3299 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3300 continuation_items = try_get(
cce889b9 3301 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3302 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3303 video_items_renderer = None
3304 for key, value in continuation_item.items():
3305 if key not in known_renderers:
8bdd16b4 3306 continue
a1b535bd 3307 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3308 continuation_list = [None]
a1b535bd 3309 for entry in known_renderers[key][0](video_items_renderer):
3310 yield entry
9ba5705a 3311 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3312 break
3313 if video_items_renderer:
3314 continue
8bdd16b4 3315 break
9558dcec 3316
8bdd16b4 3317 @staticmethod
3318 def _extract_selected_tab(tabs):
3319 for tab in tabs:
cd684175 3320 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3321 if renderer.get('selected') is True:
3322 return renderer
2b3c2546 3323 else:
8bdd16b4 3324 raise ExtractorError('Unable to find selected tab')
b82f815f 3325
8bdd16b4 3326 @staticmethod
3327 def _extract_uploader(data):
3328 uploader = {}
3329 sidebar_renderer = try_get(
3330 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3331 if sidebar_renderer:
3332 for item in sidebar_renderer:
3333 if not isinstance(item, dict):
3334 continue
3335 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3336 if not isinstance(renderer, dict):
3337 continue
3338 owner = try_get(
3339 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3340 if owner:
3341 uploader['uploader'] = owner.get('text')
3342 uploader['uploader_id'] = try_get(
3343 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3344 uploader['uploader_url'] = urljoin(
3345 'https://www.youtube.com/',
3346 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3347 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3348
d069eca7 3349 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3350 playlist_id = title = description = channel_url = channel_name = channel_id = None
3351 thumbnails_list = tags = []
3352
8bdd16b4 3353 selected_tab = self._extract_selected_tab(tabs)
3354 renderer = try_get(
3355 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3356 if renderer:
b60419c5 3357 channel_name = renderer.get('title')
3358 channel_url = renderer.get('channelUrl')
3359 channel_id = renderer.get('externalId')
39ed931e 3360 else:
64c0d954 3361 renderer = try_get(
3362 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3363
8bdd16b4 3364 if renderer:
3365 title = renderer.get('title')
ecc97af3 3366 description = renderer.get('description', '')
b60419c5 3367 playlist_id = channel_id
3368 tags = renderer.get('keywords', '').split()
3369 thumbnails_list = (
3370 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3371 or try_get(
3372 data,
3373 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3374 list)
b60419c5 3375 or [])
3376
3377 thumbnails = []
3378 for t in thumbnails_list:
3379 if not isinstance(t, dict):
3380 continue
3381 thumbnail_url = url_or_none(t.get('url'))
3382 if not thumbnail_url:
3383 continue
3384 thumbnails.append({
3385 'url': thumbnail_url,
3386 'width': int_or_none(t.get('width')),
3387 'height': int_or_none(t.get('height')),
3388 })
3462ffa8 3389 if playlist_id is None:
70d5c17b 3390 playlist_id = item_id
3391 if title is None:
39ed931e 3392 title = (
3393 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3394 or playlist_id)
b60419c5 3395 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3396 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3397
3398 metadata = {
3399 'playlist_id': playlist_id,
3400 'playlist_title': title,
3401 'playlist_description': description,
3402 'uploader': channel_name,
3403 'uploader_id': channel_id,
3404 'uploader_url': channel_url,
3405 'thumbnails': thumbnails,
3406 'tags': tags,
3407 }
3408 if not channel_id:
3409 metadata.update(self._extract_uploader(data))
3410 metadata.update({
3411 'channel': metadata['uploader'],
3412 'channel_id': metadata['uploader_id'],
3413 'channel_url': metadata['uploader_url']})
3414 return self.playlist_result(
d069eca7
M
3415 self._entries(
3416 selected_tab, playlist_id,
3417 self._extract_identity_token(webpage, item_id),
f4f751af 3418 self._extract_account_syncid(data),
3419 self._extract_ytcfg(item_id, webpage)),
b60419c5 3420 **metadata)
73c4ac2c 3421
79360d99 3422 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3423 first_id = last_id = None
79360d99 3424 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3425 headers = self._generate_api_headers(
3426 ytcfg, account_syncid=self._extract_account_syncid(data),
3427 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3428 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3429 for page_num in itertools.count(1):
cd7c66cf 3430 videos = list(self._playlist_entries(playlist))
3431 if not videos:
3432 return
2be71994 3433 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3434 if start >= len(videos):
3435 return
3436 for video in videos[start:]:
3437 if video['id'] == first_id:
3438 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3439 return
3440 yield video
3441 first_id = first_id or videos[0]['id']
3442 last_id = videos[-1]['id']
79360d99 3443 watch_endpoint = try_get(
3444 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3445 query = {
3446 'playlistId': playlist_id,
3447 'videoId': watch_endpoint.get('videoId') or last_id,
3448 'index': watch_endpoint.get('index') or len(videos),
3449 'params': watch_endpoint.get('params') or 'OAE%3D'
3450 }
3451 response = self._extract_response(
3452 item_id='%s page %d' % (playlist_id, page_num),
3453 query=query,
3454 ep='next',
3455 headers=headers,
3456 check_get_keys='contents'
3457 )
cd7c66cf 3458 playlist = try_get(
79360d99 3459 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3460
79360d99 3461 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3462 title = playlist.get('title') or try_get(
3463 data, lambda x: x['titleText']['simpleText'], compat_str)
3464 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3465
3466 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3467 playlist_url = urljoin(url, try_get(
3468 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3469 compat_str))
3470 if playlist_url and playlist_url != url:
3471 return self.url_result(
3472 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3473 video_title=title)
cd7c66cf 3474
8bdd16b4 3475 return self.playlist_result(
79360d99 3476 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3477 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3478
95c01b6c 3479 @staticmethod
3480 def _extract_alerts(data):
3481 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3482 if not isinstance(alert_dict, dict):
3483 continue
3484 for alert in alert_dict.values():
3485 alert_type = alert.get('type')
3486 if not alert_type:
02ced43c 3487 continue
95c01b6c 3488 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
3489 if message:
3490 yield alert_type, message
3491 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3492 message += try_get(run, lambda x: x['text'], compat_str)
3493 if message:
3494 yield alert_type, message
3495
3496 def _report_alerts(self, alerts, expected=True):
3ffc7c89 3497 errors = []
3498 warnings = []
95c01b6c 3499 for alert_type, alert_message in alerts:
f3eaa8dd 3500 if alert_type.lower() == 'error':
3ffc7c89 3501 errors.append([alert_type, alert_message])
f3eaa8dd 3502 else:
3ffc7c89 3503 warnings.append([alert_type, alert_message])
f3eaa8dd 3504
3ffc7c89 3505 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3506 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3507 if errors:
3508 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3509
95c01b6c 3510 def _extract_and_report_alerts(self, data, *args, **kwargs):
3511 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
3512
358de58c 3513 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3514 """
3515 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3516 """
3517 sidebar_renderer = try_get(
5d342002 3518 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3519 if not sidebar_renderer:
3520 return
3521 browse_id = params = None
358de58c 3522 for item in sidebar_renderer:
3523 if not isinstance(item, dict):
3524 continue
3525 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3526 menu_renderer = try_get(
3527 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3528 for menu_item in menu_renderer:
3529 if not isinstance(menu_item, dict):
3530 continue
3531 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3532 text = try_get(
3533 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3534 if not text or text.lower() != 'show unavailable videos':
3535 continue
3536 browse_endpoint = try_get(
3537 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3538 browse_id = browse_endpoint.get('browseId')
3539 params = browse_endpoint.get('params')
5d342002 3540 break
3541
3542 ytcfg = self._extract_ytcfg(item_id, webpage)
3543 headers = self._generate_api_headers(
3544 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3545 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3546 visitor_data=try_get(
3547 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3548 query = {
3549 'params': params or 'wgYCCAA=',
3550 'browseId': browse_id or 'VL%s' % item_id
3551 }
3552 return self._extract_response(
3553 item_id=item_id, headers=headers, query=query,
3554 check_get_keys='contents', fatal=False,
3555 note='Downloading API JSON with unavailable videos')
358de58c 3556
79360d99 3557 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3558 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3559 response = None
3560 last_error = None
3561 count = -1
a06916d9 3562 retries = self.get_param('extractor_retries', 3)
79360d99 3563 if check_get_keys is None:
3564 check_get_keys = []
3565 while count < retries:
3566 count += 1
3567 if last_error:
3568 self.report_warning('%s. Retrying ...' % last_error)
3569 try:
3570 response = self._call_api(
3571 ep=ep, fatal=True, headers=headers,
358de58c 3572 video_id=item_id, query=query,
79360d99 3573 context=self._extract_context(ytcfg),
3574 api_key=self._extract_api_key(ytcfg),
3575 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3576 except ExtractorError as e:
3577 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3578 # Downloading page may result in intermittent 5xx HTTP error
3579 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3580 last_error = 'HTTP Error %s' % e.cause.code
3581 if count < retries:
3582 continue
358de58c 3583 if fatal:
3584 raise
3585 else:
3586 self.report_warning(error_to_compat_str(e))
3587 return
3588
79360d99 3589 else:
3590 # Youtube may send alerts if there was an issue with the continuation page
95c01b6c 3591 self._extract_and_report_alerts(response, expected=False)
79360d99 3592 if not check_get_keys or dict_get(response, check_get_keys):
3593 break
3594 # Youtube sometimes sends incomplete data
3595 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3596 last_error = 'Incomplete data received'
3597 if count >= retries:
358de58c 3598 if fatal:
3599 raise ExtractorError(last_error)
3600 else:
3601 self.report_warning(last_error)
3602 return
79360d99 3603 return response
3604
cd7c66cf 3605 def _extract_webpage(self, url, item_id):
a06916d9 3606 retries = self.get_param('extractor_retries', 3)
62bff2c1 3607 count = -1
c705177d 3608 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3609 while count < retries:
62bff2c1 3610 count += 1
14fdfea9 3611 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3612 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3613 if count:
c705177d 3614 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3615 webpage = self._download_webpage(
3616 url, item_id,
cd7c66cf 3617 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3618 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3619 if data.get('contents') or data.get('currentVideoEndpoint'):
3620 break
95c01b6c 3621 # Extract alerts here only when there is error
3622 self._extract_and_report_alerts(data)
c705177d 3623 if count >= retries:
6a39ee13 3624 raise ExtractorError(last_error)
cd7c66cf 3625 return webpage, data
3626
9297939e 3627 @staticmethod
3628 def _smuggle_data(entries, data):
3629 for entry in entries:
3630 if data:
3631 entry['url'] = smuggle_url(entry['url'], data)
3632 yield entry
3633
cd7c66cf 3634 def _real_extract(self, url):
9297939e 3635 url, smuggled_data = unsmuggle_url(url, {})
3636 if self.is_music_url(url):
3637 smuggled_data['is_music_url'] = True
fe03a6cd 3638 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3639 if info_dict.get('entries'):
3640 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3641 return info_dict
3642
fe03a6cd 3643 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3644
3645 def __real_extract(self, url, smuggled_data):
cd7c66cf 3646 item_id = self._match_id(url)
3647 url = compat_urlparse.urlunparse(
3648 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3649 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3650
fe03a6cd 3651 def get_mobj(url):
3652 mobj = self._url_re.match(url).groupdict()
07cce701 3653 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 3654 return mobj
3655
3656 mobj = get_mobj(url)
3657 # Youtube returns incomplete data if tabname is not lower case
3658 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
3659
3660 if is_channel:
3661 if smuggled_data.get('is_music_url'):
3662 if item_id[:2] == 'VL':
3663 # Youtube music VL channels have an equivalent playlist
3664 item_id = item_id[2:]
3665 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 3666 elif item_id[:2] == 'MP':
3667 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
3668 item_id = self._search_regex(
3669 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
3670 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
3671 'playlist id')
3672 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 3673 elif mobj['channel_type'] == 'browse':
3674 # Youtube music /browse/ should be changed to /channel/
3675 pre = 'https://www.youtube.com/channel/%s' % item_id
3676 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
3677 # Home URLs should redirect to /videos/
6a39ee13 3678 self.report_warning(
cd7c66cf 3679 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3680 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 3681 tab = '/videos'
3682
3683 url = ''.join((pre, tab, post))
3684 mobj = get_mobj(url)
cd7c66cf 3685
3686 # Handle both video/playlist URLs
201c1459 3687 qs = parse_qs(url)
cd7c66cf 3688 video_id = qs.get('v', [None])[0]
3689 playlist_id = qs.get('list', [None])[0]
3690
fe03a6cd 3691 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 3692 if not playlist_id:
fe03a6cd 3693 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 3694 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 3695 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 3696 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3697 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 3698 mobj = get_mobj(url)
cd7c66cf 3699
3700 if video_id and playlist_id:
a06916d9 3701 if self.get_param('noplaylist'):
cd7c66cf 3702 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3703 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3704 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3705
3706 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3707
18db7548 3708 tabs = try_get(
3709 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3710 if tabs:
3711 selected_tab = self._extract_selected_tab(tabs)
3712 tab_name = selected_tab.get('title', '')
3713 if (mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]
3714 and 'no-youtube-channel-redirect' not in compat_opts):
3715 if not mobj['not_channel'] and item_id[:2] == 'UC':
3716 # Topic channels don't have /videos. Use the equivalent playlist instead
3717 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
3718 pl_id = 'UU%s' % item_id[2:]
3719 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
3720 try:
3721 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
3722 for alert_type, alert_message in self._extract_alerts(pl_data):
3723 if alert_type == 'error':
3724 raise ExtractorError('Youtube said: %s' % alert_message)
3725 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
3726 except ExtractorError:
3727 self.report_warning('The playlist gave error. Falling back to channel URL')
3728 else:
3729 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
3730
3731 self.write_debug('Final URL: %s' % url)
3732
358de58c 3733 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3734 if 'no-youtube-unavailable-videos' not in compat_opts:
3735 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 3736 self._extract_and_report_alerts(data)
358de58c 3737
8bdd16b4 3738 tabs = try_get(
3739 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3740 if tabs:
d069eca7 3741 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3742
8bdd16b4 3743 playlist = try_get(
3744 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3745 if playlist:
79360d99 3746 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3747
a0566bbf 3748 video_id = try_get(
3749 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3750 compat_str) or video_id
8bdd16b4 3751 if video_id:
6a39ee13 3752 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3753 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3754
8bdd16b4 3755 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3756
c5e8d7af 3757
8bdd16b4 3758class YoutubePlaylistIE(InfoExtractor):
3759 IE_DESC = 'YouTube.com playlists'
3760 _VALID_URL = r'''(?x)(?:
3761 (?:https?://)?
3762 (?:\w+\.)?
3763 (?:
3764 (?:
3765 youtube(?:kids)?\.com|
29f7c58a 3766 invidio\.us
8bdd16b4 3767 )
3768 /.*?\?.*?\blist=
3769 )?
3770 (?P<id>%(playlist_id)s)
3771 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3772 IE_NAME = 'youtube:playlist'
cdc628a4 3773 _TESTS = [{
8bdd16b4 3774 'note': 'issue #673',
3775 'url': 'PLBB231211A4F62143',
cdc628a4 3776 'info_dict': {
8bdd16b4 3777 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3778 'id': 'PLBB231211A4F62143',
3779 'uploader': 'Wickydoo',
3780 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3781 },
3782 'playlist_mincount': 29,
3783 }, {
3784 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3785 'info_dict': {
3786 'title': 'YDL_safe_search',
3787 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3788 },
3789 'playlist_count': 2,
3790 'skip': 'This playlist is private',
9558dcec 3791 }, {
8bdd16b4 3792 'note': 'embedded',
3793 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3794 'playlist_count': 4,
9558dcec 3795 'info_dict': {
8bdd16b4 3796 'title': 'JODA15',
3797 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3798 'uploader': 'milan',
3799 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3800 }
cdc628a4 3801 }, {
8bdd16b4 3802 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3803 'playlist_mincount': 982,
3804 'info_dict': {
3805 'title': '2018 Chinese New Singles (11/6 updated)',
3806 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3807 'uploader': 'LBK',
3808 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3809 }
daa0df9e 3810 }, {
29f7c58a 3811 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3812 'only_matching': True,
3813 }, {
3814 # music album playlist
3815 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3816 'only_matching': True,
3817 }]
3818
3819 @classmethod
3820 def suitable(cls, url):
201c1459 3821 if YoutubeTabIE.suitable(url):
3822 return False
1bdae7d3 3823 # Hack for lazy extractors until more generic solution is implemented
3824 # (see #28780)
3825 from .youtube import parse_qs
201c1459 3826 qs = parse_qs(url)
3827 if qs.get('v', [None])[0]:
3828 return False
3829 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3830
3831 def _real_extract(self, url):
3832 playlist_id = self._match_id(url)
9297939e 3833 is_music_url = self.is_music_url(url)
3834 url = update_url_query(
3835 'https://www.youtube.com/playlist',
3836 parse_qs(url) or {'list': playlist_id})
3837 if is_music_url:
3838 url = smuggle_url(url, {'is_music_url': True})
3839 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 3840
3841
3842class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3843 IE_DESC = 'youtu.be'
29f7c58a 3844 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3845 _TESTS = [{
8bdd16b4 3846 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3847 'info_dict': {
3848 'id': 'yeWKywCrFtk',
3849 'ext': 'mp4',
3850 'title': 'Small Scale Baler and Braiding Rugs',
3851 'uploader': 'Backus-Page House Museum',
3852 'uploader_id': 'backuspagemuseum',
3853 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3854 'upload_date': '20161008',
3855 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3856 'categories': ['Nonprofits & Activism'],
3857 'tags': list,
3858 'like_count': int,
3859 'dislike_count': int,
3860 },
3861 'params': {
3862 'noplaylist': True,
3863 'skip_download': True,
3864 },
39e7107d 3865 }, {
8bdd16b4 3866 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3867 'only_matching': True,
cdc628a4
PH
3868 }]
3869
8bdd16b4 3870 def _real_extract(self, url):
29f7c58a 3871 mobj = re.match(self._VALID_URL, url)
3872 video_id = mobj.group('id')
3873 playlist_id = mobj.group('playlist_id')
8bdd16b4 3874 return self.url_result(
29f7c58a 3875 update_url_query('https://www.youtube.com/watch', {
3876 'v': video_id,
3877 'list': playlist_id,
3878 'feature': 'youtu.be',
3879 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3880
3881
3882class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3883 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3884 _VALID_URL = r'ytuser:(?P<id>.+)'
3885 _TESTS = [{
3886 'url': 'ytuser:phihag',
3887 'only_matching': True,
3888 }]
3889
3890 def _real_extract(self, url):
3891 user_id = self._match_id(url)
3892 return self.url_result(
3893 'https://www.youtube.com/user/%s' % user_id,
3894 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3895
b05654f0 3896
3d3dddc9 3897class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3898 IE_NAME = 'youtube:favorites'
3899 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3900 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3901 _LOGIN_REQUIRED = True
3902 _TESTS = [{
3903 'url': ':ytfav',
3904 'only_matching': True,
3905 }, {
3906 'url': ':ytfavorites',
3907 'only_matching': True,
3908 }]
3909
3910 def _real_extract(self, url):
3911 return self.url_result(
3912 'https://www.youtube.com/playlist?list=LL',
3913 ie=YoutubeTabIE.ie_key())
3914
3915
79360d99 3916class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3917 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3918 # there doesn't appear to be a real limit, for example if you search for
3919 # 'python' you get more than 8.000.000 results
3920 _MAX_RESULTS = float('inf')
78caa52a 3921 IE_NAME = 'youtube:search'
b05654f0 3922 _SEARCH_KEY = 'ytsearch'
6c894ea1 3923 _SEARCH_PARAMS = None
9dd8e46a 3924 _TESTS = []
b05654f0 3925
6c894ea1 3926 def _entries(self, query, n):
a5c56234 3927 data = {'query': query}
6c894ea1
U
3928 if self._SEARCH_PARAMS:
3929 data['params'] = self._SEARCH_PARAMS
3930 total = 0
3931 for page_num in itertools.count(1):
79360d99 3932 search = self._extract_response(
3933 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3934 check_get_keys=('contents', 'onResponseReceivedCommands')
3935 )
6c894ea1 3936 if not search:
b4c08069 3937 break
6c894ea1
U
3938 slr_contents = try_get(
3939 search,
3940 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3941 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3942 list)
3943 if not slr_contents:
a22b2fd1 3944 break
0366ae87 3945
0366ae87
M
3946 # Youtube sometimes adds promoted content to searches,
3947 # changing the index location of videos and token.
3948 # So we search through all entries till we find them.
30a074c2 3949 continuation_token = None
3950 for slr_content in slr_contents:
a96c6d15 3951 if continuation_token is None:
3952 continuation_token = try_get(
3953 slr_content,
3954 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3955 compat_str)
3956
30a074c2 3957 isr_contents = try_get(
3958 slr_content,
3959 lambda x: x['itemSectionRenderer']['contents'],
3960 list)
9da76d30 3961 if not isr_contents:
30a074c2 3962 continue
3963 for content in isr_contents:
3964 if not isinstance(content, dict):
3965 continue
3966 video = content.get('videoRenderer')
3967 if not isinstance(video, dict):
3968 continue
3969 video_id = video.get('videoId')
3970 if not video_id:
3971 continue
3972
3973 yield self._extract_video(video)
3974 total += 1
3975 if total == n:
3976 return
0366ae87 3977
0366ae87 3978 if not continuation_token:
6c894ea1 3979 break
0366ae87 3980 data['continuation'] = continuation_token
b05654f0 3981
6c894ea1
U
3982 def _get_n_results(self, query, n):
3983 """Get a specified number of results for a query"""
3984 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3985
c9ae7b95 3986
a3dd9248 3987class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3988 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3989 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3990 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3991 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3992
c9ae7b95 3993
386e1dd9 3994class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3995 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3996 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3997 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3998 # _MAX_RESULTS = 100
3462ffa8 3999 _TESTS = [{
4000 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4001 'playlist_mincount': 5,
4002 'info_dict': {
4003 'title': 'youtube-dl test video',
4004 }
4005 }, {
4006 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4007 'only_matching': True,
4008 }]
4009
386e1dd9 4010 @classmethod
4011 def _make_valid_url(cls):
4012 return cls._VALID_URL
4013
3462ffa8 4014 def _real_extract(self, url):
386e1dd9 4015 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4016 query = (qs.get('search_query') or qs.get('q'))[0]
4017 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4018 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4019
4020
4021class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4022 """
25f14e9f 4023 Base class for feed extractors
3d3dddc9 4024 Subclasses must define the _FEED_NAME property.
d7ae0639 4025 """
b2e8bc1b 4026 _LOGIN_REQUIRED = True
ef2f3c7f 4027 _TESTS = []
d7ae0639
JMF
4028
4029 @property
4030 def IE_NAME(self):
78caa52a 4031 return 'youtube:%s' % self._FEED_NAME
04cc9617 4032
3853309f 4033 def _real_extract(self, url):
3d3dddc9 4034 return self.url_result(
4035 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4036 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4037
4038
ef2f3c7f 4039class YoutubeWatchLaterIE(InfoExtractor):
4040 IE_NAME = 'youtube:watchlater'
70d5c17b 4041 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4042 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4043 _TESTS = [{
8bdd16b4 4044 'url': ':ytwatchlater',
bc7a9cd8
S
4045 'only_matching': True,
4046 }]
25f14e9f
S
4047
4048 def _real_extract(self, url):
ef2f3c7f 4049 return self.url_result(
4050 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4051
4052
25f14e9f
S
4053class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4054 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4055 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4056 _FEED_NAME = 'recommended'
3d3dddc9 4057 _TESTS = [{
4058 'url': ':ytrec',
4059 'only_matching': True,
4060 }, {
4061 'url': ':ytrecommended',
4062 'only_matching': True,
4063 }, {
4064 'url': 'https://youtube.com',
4065 'only_matching': True,
4066 }]
1ed5b5c9 4067
1ed5b5c9 4068
25f14e9f 4069class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4070 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4071 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4072 _FEED_NAME = 'subscriptions'
3d3dddc9 4073 _TESTS = [{
4074 'url': ':ytsubs',
4075 'only_matching': True,
4076 }, {
4077 'url': ':ytsubscriptions',
4078 'only_matching': True,
4079 }]
1ed5b5c9 4080
1ed5b5c9 4081
25f14e9f 4082class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4083 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4084 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4085 _FEED_NAME = 'history'
3d3dddc9 4086 _TESTS = [{
4087 'url': ':ythistory',
4088 'only_matching': True,
4089 }]
1ed5b5c9
JMF
4090
4091
15870e90
PH
4092class YoutubeTruncatedURLIE(InfoExtractor):
4093 IE_NAME = 'youtube:truncated_url'
4094 IE_DESC = False # Do not list
975d35db 4095 _VALID_URL = r'''(?x)
b95aab84
PH
4096 (?:https?://)?
4097 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4098 (?:watch\?(?:
c4808c60 4099 feature=[a-z_]+|
b95aab84
PH
4100 annotation_id=annotation_[^&]+|
4101 x-yt-cl=[0-9]+|
c1708b89 4102 hl=[^&]*|
287be8c6 4103 t=[0-9]+
b95aab84
PH
4104 )?
4105 |
4106 attribution_link\?a=[^&]+
4107 )
4108 $
975d35db 4109 '''
15870e90 4110
c4808c60 4111 _TESTS = [{
2d3d2997 4112 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4113 'only_matching': True,
dc2fc736 4114 }, {
2d3d2997 4115 'url': 'https://www.youtube.com/watch?',
dc2fc736 4116 'only_matching': True,
b95aab84
PH
4117 }, {
4118 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4119 'only_matching': True,
4120 }, {
4121 'url': 'https://www.youtube.com/watch?feature=foo',
4122 'only_matching': True,
c1708b89
PH
4123 }, {
4124 'url': 'https://www.youtube.com/watch?hl=en-GB',
4125 'only_matching': True,
287be8c6
PH
4126 }, {
4127 'url': 'https://www.youtube.com/watch?t=2372',
4128 'only_matching': True,
c4808c60
PH
4129 }]
4130
15870e90
PH
4131 def _real_extract(self, url):
4132 raise ExtractorError(
78caa52a
PH
4133 'Did you forget to quote the URL? Remember that & is a meta '
4134 'character in most shells, so you want to put the URL in quotes, '
3867038a 4135 'like youtube-dl '
2d3d2997 4136 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4137 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4138 expected=True)
772fd5cc
PH
4139
4140
4141class YoutubeTruncatedIDIE(InfoExtractor):
4142 IE_NAME = 'youtube:truncated_id'
4143 IE_DESC = False # Do not list
b95aab84 4144 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4145
4146 _TESTS = [{
4147 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4148 'only_matching': True,
4149 }]
4150
4151 def _real_extract(self, url):
4152 video_id = self._match_id(url)
4153 raise ExtractorError(
4154 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4155 expected=True)