]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube] Better message when login required
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
fe03a6cd 70 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|'
68b91dc9 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
9d5d4d64 88
89 def warn(message):
90 self.report_warning(message)
91
92 # username+password login is broken
93 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
94 self.raise_login_required(
95 'Login details are needed to download this content', method='cookies')
68217024 96 username, password = self._get_login_info()
9d5d4d64 97 if username:
98 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
99 return
100 # Everything below this is broken!
101
b2e8bc1b
JMF
102 # No authentication to be performed
103 if username is None:
a06916d9 104 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 105 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 106 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 107 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 108 return True
b2e8bc1b 109
7cc3570e
PH
110 login_page = self._download_webpage(
111 self._LOGIN_URL, None,
69ea8ca4
PH
112 note='Downloading login page',
113 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
114 if login_page is False:
115 return
b2e8bc1b 116
1212e997 117 login_form = self._hidden_inputs(login_page)
c5e8d7af 118
e00eb564
S
119 def req(url, f_req, note, errnote):
120 data = login_form.copy()
121 data.update({
122 'pstMsg': 1,
123 'checkConnection': 'youtube',
124 'checkedDomains': 'youtube',
125 'hl': 'en',
126 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 127 'f.req': json.dumps(f_req),
e00eb564
S
128 'flowName': 'GlifWebSignIn',
129 'flowEntry': 'ServiceLogin',
baf67a60
S
130 # TODO: reverse actual botguard identifier generation algo
131 'bgRequest': '["identifier",""]',
041bc3ad 132 })
e00eb564
S
133 return self._download_json(
134 url, None, note=note, errnote=errnote,
135 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
136 fatal=False,
137 data=urlencode_postdata(data), headers={
138 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
139 'Google-Accounts-XSRF': 1,
140 })
141
3995d37d
S
142 lookup_req = [
143 username,
144 None, [], None, 'US', None, None, 2, False, True,
145 [
146 None, None,
147 [2, 1, None, 1,
148 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
149 None, [], 4],
150 1, [None, None, []], None, None, None, True
151 ],
152 username,
153 ]
154
e00eb564 155 lookup_results = req(
3995d37d 156 self._LOOKUP_URL, lookup_req,
e00eb564
S
157 'Looking up account info', 'Unable to look up account info')
158
159 if lookup_results is False:
160 return False
041bc3ad 161
3995d37d
S
162 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
163 if not user_hash:
164 warn('Unable to extract user hash')
165 return False
166
167 challenge_req = [
168 user_hash,
169 None, 1, None, [1, None, None, None, [password, None, True]],
170 [
171 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
172 1, [None, None, []], None, None, None, True
173 ]]
83317f69 174
3995d37d
S
175 challenge_results = req(
176 self._CHALLENGE_URL, challenge_req,
177 'Logging in', 'Unable to log in')
83317f69 178
3995d37d 179 if challenge_results is False:
e00eb564 180 return
83317f69 181
3995d37d
S
182 login_res = try_get(challenge_results, lambda x: x[0][5], list)
183 if login_res:
184 login_msg = try_get(login_res, lambda x: x[5], compat_str)
185 warn(
186 'Unable to login: %s' % 'Invalid password'
187 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
188 return False
189
190 res = try_get(challenge_results, lambda x: x[0][-1], list)
191 if not res:
192 warn('Unable to extract result entry')
193 return False
194
9a6628aa
S
195 login_challenge = try_get(res, lambda x: x[0][0], list)
196 if login_challenge:
197 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
198 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
199 # SEND_SUCCESS - TFA code has been successfully sent to phone
200 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 201 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
202 if status == 'QUOTA_EXCEEDED':
203 warn('Exceeded the limit of TFA codes, try later')
204 return False
205
206 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
207 if not tl:
208 warn('Unable to extract TL')
209 return False
210
211 tfa_code = self._get_tfa_info('2-step verification code')
212
213 if not tfa_code:
214 warn(
215 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
216 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
217 return False
218
219 tfa_code = remove_start(tfa_code, 'G-')
220
221 tfa_req = [
222 user_hash, None, 2, None,
223 [
224 9, None, None, None, None, None, None, None,
225 [None, tfa_code, True, 2]
226 ]]
227
228 tfa_results = req(
229 self._TFA_URL.format(tl), tfa_req,
230 'Submitting TFA code', 'Unable to submit TFA code')
231
232 if tfa_results is False:
233 return False
234
235 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
236 if tfa_res:
237 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
238 warn(
239 'Unable to finish TFA: %s' % 'Invalid TFA code'
240 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
241 return False
242
243 check_cookie_url = try_get(
244 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
245 else:
246 CHALLENGES = {
247 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
248 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
249 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
250 }
251 challenge = CHALLENGES.get(
252 challenge_str,
253 '%s returned error %s.' % (self.IE_NAME, challenge_str))
254 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
255 return False
3995d37d
S
256 else:
257 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
258
259 if not check_cookie_url:
260 warn('Unable to extract CheckCookie URL')
261 return False
e00eb564
S
262
263 check_cookie_results = self._download_webpage(
3995d37d
S
264 check_cookie_url, None, 'Checking cookie', fatal=False)
265
266 if check_cookie_results is False:
267 return False
e00eb564 268
3995d37d
S
269 if 'https://myaccount.google.com/' not in check_cookie_results:
270 warn('Unable to log in')
b2e8bc1b 271 return False
e00eb564 272
b2e8bc1b
JMF
273 return True
274
cce889b9 275 def _initialize_consent(self):
276 cookies = self._get_cookies('https://www.youtube.com/')
277 if cookies.get('__Secure-3PSID'):
278 return
279 consent_id = None
280 consent = cookies.get('CONSENT')
281 if consent:
282 if 'YES' in consent.value:
283 return
284 consent_id = self._search_regex(
285 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
286 if not consent_id:
287 consent_id = random.randint(100, 999)
288 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 289
b2e8bc1b 290 def _real_initialize(self):
cce889b9 291 self._initialize_consent()
b2e8bc1b
JMF
292 if self._downloader is None:
293 return
b2e8bc1b
JMF
294 if not self._login():
295 return
c5e8d7af 296
f4f751af 297 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
298 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 299 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 300 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
301 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 302
a5c56234
M
303 def _generate_sapisidhash_header(self):
304 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
305 if sapisid_cookie is None:
306 return
307 time_now = round(time.time())
308 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
309 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
310
311 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 312 note='Downloading API JSON', errnote='Unable to download API page',
313 context=None, api_key=None):
314
315 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 316 data.update(query)
f4f751af 317 real_headers = self._generate_api_headers()
318 real_headers.update({'content-type': 'application/json'})
319 if headers:
320 real_headers.update(headers)
545cc85d 321 return self._download_json(
a5c56234
M
322 'https://www.youtube.com/youtubei/v1/%s' % ep,
323 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 324 data=json.dumps(data).encode('utf8'), headers=real_headers,
325 query={'key': api_key or self._extract_api_key()})
326
327 def _extract_api_key(self, ytcfg=None):
328 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 329
8bdd16b4 330 def _extract_yt_initial_data(self, video_id, webpage):
331 return self._parse_json(
332 self._search_regex(
29f7c58a 333 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 334 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 335 video_id)
0c148415 336
a1c5d2ca
M
337 def _extract_identity_token(self, webpage, item_id):
338 ytcfg = self._extract_ytcfg(item_id, webpage)
339 if ytcfg:
340 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
341 if token:
342 return token
343 return self._search_regex(
344 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
345 'identity token', default=None)
346
347 @staticmethod
348 def _extract_account_syncid(data):
8ea3f7b9 349 """
350 Extract syncId required to download private playlists of secondary channels
351 @param data Either response or ytcfg
352 """
353 sync_ids = (try_get(
354 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
355 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
356 if len(sync_ids) >= 2 and sync_ids[1]:
357 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
358 # and just "user_syncid||" for primary channel. We only want the channel_syncid
359 return sync_ids[0]
8ea3f7b9 360 # ytcfg includes channel_syncid if on secondary channel
361 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 362
29f7c58a 363 def _extract_ytcfg(self, video_id, webpage):
8c54a305 364 if not webpage:
365 return {}
29f7c58a 366 return self._parse_json(
367 self._search_regex(
368 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 369 default='{}'), video_id, fatal=False) or {}
370
371 def __extract_client_version(self, ytcfg):
372 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
373
374 def _extract_context(self, ytcfg=None):
375 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
376 if context:
377 return context
378
379 # Recreate the client context (required)
380 client_version = self.__extract_client_version(ytcfg)
381 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
382 context = {
383 'client': {
384 'clientName': client_name,
385 'clientVersion': client_version,
386 }
387 }
388 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
389 if visitor_data:
390 context['client']['visitorData'] = visitor_data
391 return context
392
393 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
394 headers = {
395 'X-YouTube-Client-Name': '1',
396 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
397 }
398 if identity_token:
399 headers['x-youtube-identity-token'] = identity_token
400 if account_syncid:
401 headers['X-Goog-PageId'] = account_syncid
402 headers['X-Goog-AuthUser'] = 0
403 if visitor_data:
404 headers['x-goog-visitor-id'] = visitor_data
405 auth = self._generate_sapisidhash_header()
406 if auth is not None:
407 headers['Authorization'] = auth
408 headers['X-Origin'] = 'https://www.youtube.com'
409 return headers
29f7c58a 410
9297939e 411 @staticmethod
412 def is_music_url(url):
413 return re.match(r'https?://music\.youtube\.com/', url) is not None
414
30a074c2 415 def _extract_video(self, renderer):
416 video_id = renderer.get('videoId')
417 title = try_get(
418 renderer,
419 (lambda x: x['title']['runs'][0]['text'],
420 lambda x: x['title']['simpleText']), compat_str)
421 description = try_get(
422 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
423 compat_str)
424 duration = parse_duration(try_get(
425 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
426 view_count_text = try_get(
427 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
428 view_count = str_to_int(self._search_regex(
429 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
430 'view count', default=None))
431 uploader = try_get(
bc2ca1bb 432 renderer,
433 (lambda x: x['ownerText']['runs'][0]['text'],
434 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 435 return {
39ed931e 436 '_type': 'url',
30a074c2 437 'ie_key': YoutubeIE.ie_key(),
438 'id': video_id,
439 'url': video_id,
440 'title': title,
441 'description': description,
442 'duration': duration,
443 'view_count': view_count,
444 'uploader': uploader,
445 }
446
0c148415 447
360e1ca5 448class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 449 IE_DESC = 'YouTube.com'
bc2ca1bb 450 _INVIDIOUS_SITES = (
451 # invidious-redirect websites
452 r'(?:www\.)?redirect\.invidious\.io',
453 r'(?:(?:www|dev)\.)?invidio\.us',
454 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
455 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 456 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 457 r'(?:(?:www|au)\.)?ytprivate\.com',
458 r'(?:www\.)?invidious\.namazso\.eu',
459 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 460 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
461 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
462 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
463 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
464 # youtube-dl invidious instances list
465 r'(?:(?:www|no)\.)?invidiou\.sh',
466 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
467 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 468 r'(?:www\.)?invidious\.mastodon\.host',
469 r'(?:www\.)?invidious\.zapashcanon\.fr',
470 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 471 r'(?:www\.)?invidious\.tinfoil-hat\.net',
472 r'(?:www\.)?invidious\.himiko\.cloud',
473 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 474 r'(?:www\.)?invidious\.tube',
475 r'(?:www\.)?invidiou\.site',
476 r'(?:www\.)?invidious\.site',
477 r'(?:www\.)?invidious\.xyz',
478 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 479 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 480 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 481 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 482 r'(?:www\.)?tube\.poal\.co',
483 r'(?:www\.)?tube\.connect\.cafe',
484 r'(?:www\.)?vid\.wxzm\.sx',
485 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 486 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 487 r'(?:www\.)?yewtu\.be',
488 r'(?:www\.)?yt\.elukerio\.org',
489 r'(?:www\.)?yt\.lelux\.fi',
490 r'(?:www\.)?invidious\.ggc-project\.de',
491 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 492 r'(?:www\.)?ytprivate\.com',
493 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 494 r'(?:www\.)?invidious\.toot\.koeln',
495 r'(?:www\.)?invidious\.fdn\.fr',
496 r'(?:www\.)?watch\.nettohikari\.com',
497 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
498 r'(?:www\.)?qklhadlycap4cnod\.onion',
499 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
500 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
501 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
502 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
503 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
504 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
505 )
cb7dfeea 506 _VALID_URL = r"""(?x)^
c5e8d7af 507 (
edb53e2d 508 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 509 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
510 (?:www\.)?deturl\.com/www\.youtube\.com|
511 (?:www\.)?pwnyoutube\.com|
512 (?:www\.)?hooktube\.com|
513 (?:www\.)?yourepeat\.com|
514 tube\.majestyc\.net|
515 %(invidious)s|
516 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
517 (?:.*?\#/)? # handle anchor (#/) redirect urls
518 (?: # the various things that can precede the ID:
ac7553d0 519 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 520 |(?: # or the v= param in all its forms
f7000f3a 521 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 522 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 523 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
524 v=
525 )
f4b05232 526 ))
cbaed4bb
S
527 |(?:
528 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
529 vid\.plus| # or vid.plus/xxxx
530 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 531 %(invidious)s
cbaed4bb 532 )/
edb53e2d 533 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 534 )
c5e8d7af 535 )? # all until now is optional -> you can pass the naked ID
201c1459 536 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 537 (?(1).+)? # if we found the ID, everything can follow
9297939e 538 (?:\#|$)""" % {
bc2ca1bb 539 'invidious': '|'.join(_INVIDIOUS_SITES),
540 }
e40c758c 541 _PLAYER_INFO_RE = (
cc2db878 542 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
543 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 544 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 545 )
2c62dc26 546 _formats = {
c2d3cb4c 547 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
548 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
549 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
550 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
551 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
552 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
553 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
554 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 555 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 556 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
557 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
558 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
559 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
560 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
561 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 562 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 563 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
564 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 565
566
567 # 3D videos
c2d3cb4c 568 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
569 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
570 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
571 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 572 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
573 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
574 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 575
96fb5605 576 # Apple HTTP Live Streaming
11f12195 577 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 578 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
579 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
580 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
581 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
582 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 583 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
584 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
585
586 # DASH mp4 video
d23028a8
S
587 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
588 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
589 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
590 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
591 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 592 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
593 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
594 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
595 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
596 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
597 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
598 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 599
f6f1fc92 600 # Dash mp4 audio
d23028a8
S
601 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
602 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
603 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
604 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
605 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
606 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
607 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
608
609 # Dash webm
d23028a8
S
610 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
611 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
612 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
613 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
614 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
615 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
616 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
617 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
618 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
619 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
620 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
621 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
622 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
623 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
624 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 625 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
626 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
627 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
628 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
629 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
630 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
631 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
632
633 # Dash webm audio
d23028a8
S
634 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
635 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 636
0857baad 637 # Dash webm audio with opus inside
d23028a8
S
638 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
639 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
640 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 641
ce6b9a2d
PH
642 # RTMP (unnamed)
643 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
644
645 # av01 video only formats sometimes served with "unknown" codecs
646 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
647 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
648 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
649 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 650 }
29f7c58a 651 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 652
fd5c4aab
S
653 _GEO_BYPASS = False
654
78caa52a 655 IE_NAME = 'youtube'
2eb88d95
PH
656 _TESTS = [
657 {
2d3d2997 658 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
659 'info_dict': {
660 'id': 'BaW_jenozKc',
661 'ext': 'mp4',
3867038a 662 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
663 'uploader': 'Philipp Hagemeister',
664 'uploader_id': 'phihag',
ec85ded8 665 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
666 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
667 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 668 'upload_date': '20121002',
3867038a 669 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 670 'categories': ['Science & Technology'],
3867038a 671 'tags': ['youtube-dl'],
556dbe7f 672 'duration': 10,
dbdaaa23 673 'view_count': int,
3e7c1224
PH
674 'like_count': int,
675 'dislike_count': int,
7c80519c 676 'start_time': 1,
297a564b 677 'end_time': 9,
2eb88d95 678 }
0e853ca4 679 },
fccd3771 680 {
4bc3a23e
PH
681 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
682 'note': 'Embed-only video (#1746)',
683 'info_dict': {
684 'id': 'yZIXLfi8CZQ',
685 'ext': 'mp4',
686 'upload_date': '20120608',
687 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
688 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
689 'uploader': 'SET India',
94bfcd23 690 'uploader_id': 'setindia',
ec85ded8 691 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 692 'age_limit': 18,
545cc85d 693 },
694 'skip': 'Private video',
fccd3771 695 },
11b56058 696 {
8bdd16b4 697 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
698 'note': 'Use the first video ID in the URL',
699 'info_dict': {
700 'id': 'BaW_jenozKc',
701 'ext': 'mp4',
3867038a 702 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
703 'uploader': 'Philipp Hagemeister',
704 'uploader_id': 'phihag',
ec85ded8 705 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 706 'upload_date': '20121002',
3867038a 707 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 708 'categories': ['Science & Technology'],
3867038a 709 'tags': ['youtube-dl'],
556dbe7f 710 'duration': 10,
dbdaaa23 711 'view_count': int,
11b56058
PM
712 'like_count': int,
713 'dislike_count': int,
34a7de29
S
714 },
715 'params': {
716 'skip_download': True,
717 },
11b56058 718 },
dd27fd17 719 {
2d3d2997 720 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
721 'note': '256k DASH audio (format 141) via DASH manifest',
722 'info_dict': {
723 'id': 'a9LDPn-MO4I',
724 'ext': 'm4a',
725 'upload_date': '20121002',
726 'uploader_id': '8KVIDEO',
ec85ded8 727 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
728 'description': '',
729 'uploader': '8KVIDEO',
730 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 731 },
4bc3a23e
PH
732 'params': {
733 'youtube_include_dash_manifest': True,
734 'format': '141',
4919603f 735 },
de3c7fe0 736 'skip': 'format 141 not served anymore',
dd27fd17 737 },
8bdd16b4 738 # DASH manifest with encrypted signature
739 {
740 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
741 'info_dict': {
742 'id': 'IB3lcPjvWLA',
743 'ext': 'm4a',
744 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
745 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
746 'duration': 244,
747 'uploader': 'AfrojackVEVO',
748 'uploader_id': 'AfrojackVEVO',
749 'upload_date': '20131011',
cc2db878 750 'abr': 129.495,
8bdd16b4 751 },
752 'params': {
753 'youtube_include_dash_manifest': True,
754 'format': '141/bestaudio[ext=m4a]',
755 },
756 },
aa79ac0c
PH
757 # Controversy video
758 {
759 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
760 'info_dict': {
761 'id': 'T4XJQO3qol8',
762 'ext': 'mp4',
556dbe7f 763 'duration': 219,
aa79ac0c 764 'upload_date': '20100909',
4fe54c12 765 'uploader': 'Amazing Atheist',
aa79ac0c 766 'uploader_id': 'TheAmazingAtheist',
ec85ded8 767 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 768 'title': 'Burning Everyone\'s Koran',
545cc85d 769 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 770 }
c522adb1 771 },
dd2d55f1 772 # Normal age-gate video (embed allowed)
c522adb1 773 {
2d3d2997 774 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
775 'info_dict': {
776 'id': 'HtVdAasjOgU',
777 'ext': 'mp4',
778 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 779 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 780 'duration': 142,
c522adb1
JMF
781 'uploader': 'The Witcher',
782 'uploader_id': 'WitcherGame',
ec85ded8 783 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 784 'upload_date': '20140605',
34952f09 785 'age_limit': 18,
c522adb1
JMF
786 },
787 },
8bdd16b4 788 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
789 # YouTube Red ad is not captured for creator
790 {
791 'url': '__2ABJjxzNo',
792 'info_dict': {
793 'id': '__2ABJjxzNo',
794 'ext': 'mp4',
795 'duration': 266,
796 'upload_date': '20100430',
797 'uploader_id': 'deadmau5',
798 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 799 'creator': 'deadmau5',
800 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 801 'uploader': 'deadmau5',
802 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 803 'alt_title': 'Some Chords',
8bdd16b4 804 },
805 'expected_warnings': [
806 'DASH manifest missing',
807 ]
808 },
067aa17e 809 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
810 {
811 'url': 'lqQg6PlCWgI',
812 'info_dict': {
813 'id': 'lqQg6PlCWgI',
814 'ext': 'mp4',
556dbe7f 815 'duration': 6085,
90227264 816 'upload_date': '20150827',
cbe2bd91 817 'uploader_id': 'olympic',
ec85ded8 818 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 819 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 820 'uploader': 'Olympic',
cbe2bd91
PH
821 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
822 },
823 'params': {
824 'skip_download': 'requires avconv',
e52a40ab 825 }
cbe2bd91 826 },
6271f1ca
PH
827 # Non-square pixels
828 {
829 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
830 'info_dict': {
831 'id': '_b-2C3KPAM0',
832 'ext': 'mp4',
833 'stretched_ratio': 16 / 9.,
556dbe7f 834 'duration': 85,
6271f1ca
PH
835 'upload_date': '20110310',
836 'uploader_id': 'AllenMeow',
ec85ded8 837 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 838 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 839 'uploader': '孫ᄋᄅ',
6271f1ca
PH
840 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
841 },
06b491eb
S
842 },
843 # url_encoded_fmt_stream_map is empty string
844 {
845 'url': 'qEJwOuvDf7I',
846 'info_dict': {
847 'id': 'qEJwOuvDf7I',
f57b7835 848 'ext': 'webm',
06b491eb
S
849 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
850 'description': '',
851 'upload_date': '20150404',
852 'uploader_id': 'spbelect',
853 'uploader': 'Наблюдатели Петербурга',
854 },
855 'params': {
856 'skip_download': 'requires avconv',
e323cf3f
S
857 },
858 'skip': 'This live event has ended.',
06b491eb 859 },
067aa17e 860 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
861 {
862 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
863 'info_dict': {
864 'id': 'FIl7x6_3R5Y',
eb6793ba 865 'ext': 'webm',
da77d856
S
866 'title': 'md5:7b81415841e02ecd4313668cde88737a',
867 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 868 'duration': 220,
da77d856
S
869 'upload_date': '20150625',
870 'uploader_id': 'dorappi2000',
ec85ded8 871 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 872 'uploader': 'dorappi2000',
eb6793ba 873 'formats': 'mincount:31',
da77d856 874 },
eb6793ba 875 'skip': 'not actual anymore',
2ee8f5d8 876 },
8a1a26ce
YCH
877 # DASH manifest with segment_list
878 {
879 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
880 'md5': '8ce563a1d667b599d21064e982ab9e31',
881 'info_dict': {
882 'id': 'CsmdDsKjzN8',
883 'ext': 'mp4',
17ee98e1 884 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
885 'uploader': 'Airtek',
886 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
887 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
888 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
889 },
890 'params': {
891 'youtube_include_dash_manifest': True,
892 'format': '135', # bestvideo
be49068d
S
893 },
894 'skip': 'This live event has ended.',
2ee8f5d8 895 },
cf7e015f
S
896 {
897 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 898 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 899 'info_dict': {
545cc85d 900 'id': 'jvGDaLqkpTg',
901 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
902 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
903 },
904 'playlist': [{
905 'info_dict': {
545cc85d 906 'id': 'jvGDaLqkpTg',
cf7e015f 907 'ext': 'mp4',
545cc85d 908 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
909 'description': 'md5:e03b909557865076822aa169218d6a5d',
910 'duration': 10643,
911 'upload_date': '20161111',
912 'uploader': 'Team PGP',
913 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
914 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
915 },
916 }, {
917 'info_dict': {
545cc85d 918 'id': '3AKt1R1aDnw',
cf7e015f 919 'ext': 'mp4',
545cc85d 920 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
921 'description': 'md5:e03b909557865076822aa169218d6a5d',
922 'duration': 10991,
923 'upload_date': '20161111',
924 'uploader': 'Team PGP',
925 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
926 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
927 },
928 }, {
929 'info_dict': {
545cc85d 930 'id': 'RtAMM00gpVc',
cf7e015f 931 'ext': 'mp4',
545cc85d 932 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
933 'description': 'md5:e03b909557865076822aa169218d6a5d',
934 'duration': 10995,
935 'upload_date': '20161111',
936 'uploader': 'Team PGP',
937 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
938 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
939 },
940 }, {
941 'info_dict': {
545cc85d 942 'id': '6N2fdlP3C5U',
cf7e015f 943 'ext': 'mp4',
545cc85d 944 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
945 'description': 'md5:e03b909557865076822aa169218d6a5d',
946 'duration': 10990,
947 'upload_date': '20161111',
948 'uploader': 'Team PGP',
949 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
950 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
951 },
952 }],
953 'params': {
954 'skip_download': True,
955 },
cbaed4bb 956 },
f9f49d87 957 {
067aa17e 958 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
959 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
960 'info_dict': {
961 'id': 'gVfLd0zydlo',
962 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
963 },
964 'playlist_count': 2,
be49068d 965 'skip': 'Not multifeed anymore',
f9f49d87 966 },
cbaed4bb 967 {
2d3d2997 968 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 969 'only_matching': True,
0e49d9a6 970 },
6d4fc66b 971 {
2d3d2997 972 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
973 'only_matching': True,
974 },
0e49d9a6 975 {
067aa17e 976 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 977 # Also tests cut-off URL expansion in video description (see
067aa17e
S
978 # https://github.com/ytdl-org/youtube-dl/issues/1892,
979 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
980 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
981 'info_dict': {
982 'id': 'lsguqyKfVQg',
983 'ext': 'mp4',
984 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 985 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 986 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 987 'duration': 133,
0e49d9a6
LL
988 'upload_date': '20151119',
989 'uploader_id': 'IronSoulElf',
ec85ded8 990 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 991 'uploader': 'IronSoulElf',
eb6793ba
S
992 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
993 'track': 'Dark Walk - Position Music',
994 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 995 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
996 },
997 'params': {
998 'skip_download': True,
999 },
1000 },
61f92af1 1001 {
067aa17e 1002 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1003 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1004 'only_matching': True,
1005 },
313dfc45
LL
1006 {
1007 # Video with yt:stretch=17:0
1008 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1009 'info_dict': {
1010 'id': 'Q39EVAstoRM',
1011 'ext': 'mp4',
1012 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1013 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1014 'upload_date': '20151107',
1015 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1016 'uploader': 'CH GAMER DROID',
1017 },
1018 'params': {
1019 'skip_download': True,
1020 },
be49068d 1021 'skip': 'This video does not exist.',
313dfc45 1022 },
201c1459 1023 {
1024 # Video with incomplete 'yt:stretch=16:'
1025 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1026 'only_matching': True,
1027 },
7caf9830
S
1028 {
1029 # Video licensed under Creative Commons
1030 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1031 'info_dict': {
1032 'id': 'M4gD1WSo5mA',
1033 'ext': 'mp4',
1034 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1035 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1036 'duration': 721,
7caf9830
S
1037 'upload_date': '20150127',
1038 'uploader_id': 'BerkmanCenter',
ec85ded8 1039 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1040 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1041 'license': 'Creative Commons Attribution license (reuse allowed)',
1042 },
1043 'params': {
1044 'skip_download': True,
1045 },
1046 },
fd050249
S
1047 {
1048 # Channel-like uploader_url
1049 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1050 'info_dict': {
1051 'id': 'eQcmzGIKrzg',
1052 'ext': 'mp4',
1053 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1054 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1055 'duration': 4060,
fd050249 1056 'upload_date': '20151119',
eb6793ba 1057 'uploader': 'Bernie Sanders',
fd050249 1058 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1059 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1060 'license': 'Creative Commons Attribution license (reuse allowed)',
1061 },
1062 'params': {
1063 'skip_download': True,
1064 },
1065 },
040ac686
S
1066 {
1067 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1068 'only_matching': True,
7f29cf54
S
1069 },
1070 {
067aa17e 1071 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1072 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1073 'only_matching': True,
6496ccb4
S
1074 },
1075 {
1076 # Rental video preview
1077 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1078 'info_dict': {
1079 'id': 'uGpuVWrhIzE',
1080 'ext': 'mp4',
1081 'title': 'Piku - Trailer',
1082 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1083 'upload_date': '20150811',
1084 'uploader': 'FlixMatrix',
1085 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1086 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1087 'license': 'Standard YouTube License',
1088 },
1089 'params': {
1090 'skip_download': True,
1091 },
eb6793ba 1092 'skip': 'This video is not available.',
022a5d66 1093 },
12afdc2a
S
1094 {
1095 # YouTube Red video with episode data
1096 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1097 'info_dict': {
1098 'id': 'iqKdEhx-dD4',
1099 'ext': 'mp4',
1100 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1101 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1102 'duration': 2085,
12afdc2a
S
1103 'upload_date': '20170118',
1104 'uploader': 'Vsauce',
1105 'uploader_id': 'Vsauce',
1106 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1107 'series': 'Mind Field',
1108 'season_number': 1,
1109 'episode_number': 1,
1110 },
1111 'params': {
1112 'skip_download': True,
1113 },
1114 'expected_warnings': [
1115 'Skipping DASH manifest',
1116 ],
1117 },
c7121fa7
S
1118 {
1119 # The following content has been identified by the YouTube community
1120 # as inappropriate or offensive to some audiences.
1121 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1122 'info_dict': {
1123 'id': '6SJNVb0GnPI',
1124 'ext': 'mp4',
1125 'title': 'Race Differences in Intelligence',
1126 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1127 'duration': 965,
1128 'upload_date': '20140124',
1129 'uploader': 'New Century Foundation',
1130 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1131 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1132 },
1133 'params': {
1134 'skip_download': True,
1135 },
545cc85d 1136 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1137 },
022a5d66
S
1138 {
1139 # itag 212
1140 'url': '1t24XAntNCY',
1141 'only_matching': True,
fd5c4aab
S
1142 },
1143 {
1144 # geo restricted to JP
1145 'url': 'sJL6WA-aGkQ',
1146 'only_matching': True,
1147 },
cd5a74a2
S
1148 {
1149 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1150 'only_matching': True,
1151 },
bc2ca1bb 1152 {
1153 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1154 'only_matching': True,
1155 },
1156 {
1157 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1158 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1159 'only_matching': True,
1160 },
825cd268
RA
1161 {
1162 # DRM protected
1163 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1164 'only_matching': True,
4fe54c12
S
1165 },
1166 {
1167 # Video with unsupported adaptive stream type formats
1168 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1169 'info_dict': {
1170 'id': 'Z4Vy8R84T1U',
1171 'ext': 'mp4',
1172 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1173 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1174 'duration': 433,
1175 'upload_date': '20130923',
1176 'uploader': 'Amelia Putri Harwita',
1177 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1178 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1179 'formats': 'maxcount:10',
1180 },
1181 'params': {
1182 'skip_download': True,
1183 'youtube_include_dash_manifest': False,
1184 },
5429d6a9 1185 'skip': 'not actual anymore',
5caabd3c 1186 },
1187 {
822b9d9c 1188 # Youtube Music Auto-generated description
5caabd3c 1189 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1190 'info_dict': {
1191 'id': 'MgNrAu2pzNs',
1192 'ext': 'mp4',
1193 'title': 'Voyeur Girl',
1194 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1195 'upload_date': '20190312',
5429d6a9
S
1196 'uploader': 'Stephen - Topic',
1197 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1198 'artist': 'Stephen',
1199 'track': 'Voyeur Girl',
1200 'album': 'it\'s too much love to know my dear',
1201 'release_date': '20190313',
1202 'release_year': 2019,
1203 },
1204 'params': {
1205 'skip_download': True,
1206 },
1207 },
66b48727
RA
1208 {
1209 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1210 'only_matching': True,
1211 },
011e75e6
S
1212 {
1213 # invalid -> valid video id redirection
1214 'url': 'DJztXj2GPfl',
1215 'info_dict': {
1216 'id': 'DJztXj2GPfk',
1217 'ext': 'mp4',
1218 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1219 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1220 'upload_date': '20090125',
1221 'uploader': 'Prochorowka',
1222 'uploader_id': 'Prochorowka',
1223 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1224 'artist': 'Panjabi MC',
1225 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1226 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1227 },
1228 'params': {
1229 'skip_download': True,
1230 },
545cc85d 1231 'skip': 'Video unavailable',
ea74e00b
DP
1232 },
1233 {
1234 # empty description results in an empty string
1235 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1236 'info_dict': {
1237 'id': 'x41yOUIvK2k',
1238 'ext': 'mp4',
1239 'title': 'IMG 3456',
1240 'description': '',
1241 'upload_date': '20170613',
1242 'uploader_id': 'ElevageOrVert',
1243 'uploader': 'ElevageOrVert',
1244 },
1245 'params': {
1246 'skip_download': True,
1247 },
1248 },
a0566bbf 1249 {
29f7c58a 1250 # with '};' inside yt initial data (see [1])
1251 # see [2] for an example with '};' inside ytInitialPlayerResponse
1252 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1253 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1254 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1255 'info_dict': {
1256 'id': 'CHqg6qOn4no',
1257 'ext': 'mp4',
1258 'title': 'Part 77 Sort a list of simple types in c#',
1259 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1260 'upload_date': '20130831',
1261 'uploader_id': 'kudvenkat',
1262 'uploader': 'kudvenkat',
1263 },
1264 'params': {
1265 'skip_download': True,
1266 },
1267 },
29f7c58a 1268 {
1269 # another example of '};' in ytInitialData
1270 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1271 'only_matching': True,
1272 },
1273 {
1274 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1275 'only_matching': True,
1276 },
545cc85d 1277 {
cc2db878 1278 # https://github.com/ytdl-org/youtube-dl/pull/28094
1279 'url': 'OtqTfy26tG0',
1280 'info_dict': {
1281 'id': 'OtqTfy26tG0',
1282 'ext': 'mp4',
1283 'title': 'Burn Out',
1284 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1285 'upload_date': '20141120',
1286 'uploader': 'The Cinematic Orchestra - Topic',
1287 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1288 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1289 'artist': 'The Cinematic Orchestra',
1290 'track': 'Burn Out',
1291 'album': 'Every Day',
1292 'release_data': None,
1293 'release_year': None,
1294 },
1295 'params': {
1296 'skip_download': True,
1297 },
545cc85d 1298 },
bc2ca1bb 1299 {
1300 # controversial video, only works with bpctr when authenticated with cookies
1301 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1302 'only_matching': True,
1303 },
f7ad7160 1304 {
1305 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1306 'url': 'cBvYw8_A0vQ',
1307 'info_dict': {
1308 'id': 'cBvYw8_A0vQ',
1309 'ext': 'mp4',
1310 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1311 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1312 'upload_date': '20201120',
1313 'uploader': 'Walk around Japan',
1314 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1315 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1316 },
1317 'params': {
1318 'skip_download': True,
1319 },
0fb983f6 1320 }, {
1321 # Has multiple audio streams
1322 'url': 'WaOKSUlf4TM',
1323 'only_matching': True
9297939e 1324 }, {
1325 # Requires Premium: has format 141 when requested using YTM url
1326 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1327 'only_matching': True
1328 }, {
120916da 1329 # multiple subtitles with same lang_code
1330 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1331 'only_matching': True,
1332 },
2eb88d95
PH
1333 ]
1334
201c1459 1335 @classmethod
1336 def suitable(cls, url):
1bdae7d3 1337 # Hack for lazy extractors until more generic solution is implemented
1338 # (see #28780)
1339 from .youtube import parse_qs
201c1459 1340 qs = parse_qs(url)
1341 if qs.get('list', [None])[0]:
1342 return False
1343 return super(YoutubeIE, cls).suitable(url)
1344
e0df6211
PH
1345 def __init__(self, *args, **kwargs):
1346 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1347 self._code_cache = {}
83799698 1348 self._player_cache = {}
e0df6211 1349
60064c53
PH
1350 def _signature_cache_id(self, example_sig):
1351 """ Return a string representation of a signature """
78caa52a 1352 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1353
e40c758c
S
1354 @classmethod
1355 def _extract_player_info(cls, player_url):
1356 for player_re in cls._PLAYER_INFO_RE:
1357 id_m = re.search(player_re, player_url)
1358 if id_m:
1359 break
1360 else:
c081b35c 1361 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1362 return id_m.group('id')
e40c758c
S
1363
1364 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1365 player_id = self._extract_player_info(player_url)
e0df6211 1366
c4417ddb 1367 # Read from filesystem cache
545cc85d 1368 func_id = 'js_%s_%s' % (
1369 player_id, self._signature_cache_id(example_sig))
c4417ddb 1370 assert os.path.basename(func_id) == func_id
a0e07d31 1371
69ea8ca4 1372 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1373 if cache_spec is not None:
78caa52a 1374 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1375
545cc85d 1376 if player_id not in self._code_cache:
1377 self._code_cache[player_id] = self._download_webpage(
e0df6211 1378 player_url, video_id,
545cc85d 1379 note='Downloading player ' + player_id,
69ea8ca4 1380 errnote='Download of %s failed' % player_url)
545cc85d 1381 code = self._code_cache[player_id]
1382 res = self._parse_sig_js(code)
e0df6211 1383
785521bf
PH
1384 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1385 cache_res = res(test_string)
1386 cache_spec = [ord(c) for c in cache_res]
83799698 1387
69ea8ca4 1388 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1389 return res
1390
60064c53 1391 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1392 def gen_sig_code(idxs):
1393 def _genslice(start, end, step):
78caa52a 1394 starts = '' if start == 0 else str(start)
8bcc8756 1395 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1396 steps = '' if step == 1 else (':%d' % step)
78caa52a 1397 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1398
1399 step = None
7af808a5
PH
1400 # Quelch pyflakes warnings - start will be set when step is set
1401 start = '(Never used)'
edf3e38e
PH
1402 for i, prev in zip(idxs[1:], idxs[:-1]):
1403 if step is not None:
1404 if i - prev == step:
1405 continue
1406 yield _genslice(start, prev, step)
1407 step = None
1408 continue
1409 if i - prev in [-1, 1]:
1410 step = i - prev
1411 start = prev
1412 continue
1413 else:
78caa52a 1414 yield 's[%d]' % prev
edf3e38e 1415 if step is None:
78caa52a 1416 yield 's[%d]' % i
edf3e38e
PH
1417 else:
1418 yield _genslice(start, i, step)
1419
78caa52a 1420 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1421 cache_res = func(test_string)
edf3e38e 1422 cache_spec = [ord(c) for c in cache_res]
78caa52a 1423 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1424 signature_id_tuple = '(%s)' % (
1425 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1426 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1427 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1428 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1429
e0df6211
PH
1430 def _parse_sig_js(self, jscode):
1431 funcname = self._search_regex(
abefc03f
S
1432 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1433 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1434 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1435 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1436 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1437 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1438 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1439 # Obsolete patterns
1440 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1441 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1442 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1443 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1444 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1445 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1446 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1447 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1448 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1449
1450 jsi = JSInterpreter(jscode)
1451 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1452 return lambda s: initial_function([s])
1453
545cc85d 1454 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1455 """Turn the encrypted s field into a working signature"""
6b37f0be 1456
c8bf86d5 1457 if player_url is None:
69ea8ca4 1458 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1459
69ea8ca4 1460 if player_url.startswith('//'):
78caa52a 1461 player_url = 'https:' + player_url
3c90cc8b
S
1462 elif not re.match(r'https?://', player_url):
1463 player_url = compat_urlparse.urljoin(
1464 'https://www.youtube.com', player_url)
c8bf86d5 1465 try:
62af3a0e 1466 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1467 if player_id not in self._player_cache:
1468 func = self._extract_signature_function(
60064c53 1469 video_id, player_url, s
c8bf86d5
PH
1470 )
1471 self._player_cache[player_id] = func
1472 func = self._player_cache[player_id]
a06916d9 1473 if self.get_param('youtube_print_sig_code'):
60064c53 1474 self._print_sig_code(func, s)
c8bf86d5
PH
1475 return func(s)
1476 except Exception as e:
1477 tb = traceback.format_exc()
1478 raise ExtractorError(
78caa52a 1479 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1480
545cc85d 1481 def _mark_watched(self, video_id, player_response):
21c340b8
S
1482 playback_url = url_or_none(try_get(
1483 player_response,
545cc85d 1484 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1485 if not playback_url:
1486 return
1487 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1488 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1489
1490 # cpn generation algorithm is reverse engineered from base.js.
1491 # In fact it works even with dummy cpn.
1492 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1493 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1494
1495 qs.update({
1496 'ver': ['2'],
1497 'cpn': [cpn],
1498 })
1499 playback_url = compat_urlparse.urlunparse(
15707c7e 1500 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1501
1502 self._download_webpage(
1503 playback_url, video_id, 'Marking watched',
1504 'Unable to mark watched', fatal=False)
1505
66c9fa36
S
1506 @staticmethod
1507 def _extract_urls(webpage):
1508 # Embedded YouTube player
1509 entries = [
1510 unescapeHTML(mobj.group('url'))
1511 for mobj in re.finditer(r'''(?x)
1512 (?:
1513 <iframe[^>]+?src=|
1514 data-video-url=|
1515 <embed[^>]+?src=|
1516 embedSWF\(?:\s*|
1517 <object[^>]+data=|
1518 new\s+SWFObject\(
1519 )
1520 (["\'])
1521 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1522 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1523 \1''', webpage)]
1524
1525 # lazyYT YouTube embed
1526 entries.extend(list(map(
1527 unescapeHTML,
1528 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1529
1530 # Wordpress "YouTube Video Importer" plugin
1531 matches = re.findall(r'''(?x)<div[^>]+
1532 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1533 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1534 entries.extend(m[-1] for m in matches)
1535
1536 return entries
1537
1538 @staticmethod
1539 def _extract_url(webpage):
1540 urls = YoutubeIE._extract_urls(webpage)
1541 return urls[0] if urls else None
1542
97665381
PH
1543 @classmethod
1544 def extract_id(cls, url):
1545 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1546 if mobj is None:
69ea8ca4 1547 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1548 video_id = mobj.group(2)
1549 return video_id
1550
545cc85d 1551 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1552 chapters_list = try_get(
8bdd16b4 1553 data,
84213ea8
S
1554 lambda x: x['playerOverlays']
1555 ['playerOverlayRenderer']
1556 ['decoratedPlayerBarRenderer']
1557 ['decoratedPlayerBarRenderer']
1558 ['playerBar']
1559 ['chapteredPlayerBarRenderer']
1560 ['chapters'],
1561 list)
1562 if not chapters_list:
1563 return
1564
1565 def chapter_time(chapter):
1566 return float_or_none(
1567 try_get(
1568 chapter,
1569 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1570 int),
1571 scale=1000)
1572 chapters = []
1573 for next_num, chapter in enumerate(chapters_list, start=1):
1574 start_time = chapter_time(chapter)
1575 if start_time is None:
1576 continue
1577 end_time = (chapter_time(chapters_list[next_num])
1578 if next_num < len(chapters_list) else duration)
1579 if end_time is None:
1580 continue
1581 title = try_get(
1582 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1583 compat_str)
1584 chapters.append({
1585 'start_time': start_time,
1586 'end_time': end_time,
1587 'title': title,
1588 })
1589 return chapters
1590
545cc85d 1591 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1592 return self._parse_json(self._search_regex(
1593 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1594 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1595
d92f5d5a 1596 @staticmethod
1597 def parse_time_text(time_text):
1598 """
1599 Parse the comment time text
1600 time_text is in the format 'X units ago (edited)'
1601 """
1602 time_text_split = time_text.split(' ')
1603 if len(time_text_split) >= 3:
1604 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1605
a1c5d2ca
M
1606 @staticmethod
1607 def _join_text_entries(runs):
1608 text = None
1609 for run in runs:
1610 if not isinstance(run, dict):
1611 continue
1612 sub_text = try_get(run, lambda x: x['text'], compat_str)
1613 if sub_text:
1614 if not text:
1615 text = sub_text
1616 continue
1617 text += sub_text
1618 return text
1619
1620 def _extract_comment(self, comment_renderer, parent=None):
1621 comment_id = comment_renderer.get('commentId')
1622 if not comment_id:
1623 return
1624 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1625 text = self._join_text_entries(comment_text_runs) or ''
1626 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1627 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1628 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1629 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1630 author_id = try_get(comment_renderer,
1631 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1632 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1633 lambda x: x['likeCount']), compat_str)) or 0
1634 author_thumbnail = try_get(comment_renderer,
1635 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1636
1637 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1638 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1639 return {
1640 'id': comment_id,
1641 'text': text,
d92f5d5a 1642 'timestamp': timestamp,
a1c5d2ca
M
1643 'time_text': time_text,
1644 'like_count': votes,
1645 'is_favorited': is_liked,
1646 'author': author,
1647 'author_id': author_id,
1648 'author_thumbnail': author_thumbnail,
1649 'author_is_uploader': author_is_uploader,
1650 'parent': parent or 'root'
1651 }
1652
1653 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1654 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1655
1656 def extract_thread(parent_renderer):
1657 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1658 if not parent:
1659 comment_counts[2] = 0
1660 for content in contents:
1661 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1662 comment_renderer = try_get(
1663 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1664 content, (lambda x: x['commentRenderer'], dict))
1665
1666 if not comment_renderer:
1667 continue
1668 comment = self._extract_comment(comment_renderer, parent)
1669 if not comment:
1670 continue
1671 comment_counts[0] += 1
1672 yield comment
1673 # Attempt to get the replies
1674 comment_replies_renderer = try_get(
1675 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1676
1677 if comment_replies_renderer:
1678 comment_counts[2] += 1
1679 comment_entries_iter = self._comment_entries(
f4f751af 1680 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1681 parent=comment.get('id'), session_token_list=session_token_list,
1682 comment_counts=comment_counts)
1683
1684 for reply_comment in comment_entries_iter:
1685 yield reply_comment
1686
1687 if not comment_counts:
1688 # comment so far, est. total comments, current comment thread #
1689 comment_counts = [0, 0, 0]
a1c5d2ca
M
1690
1691 # TODO: Generalize the download code with TabIE
f4f751af 1692 context = self._extract_context(ytcfg)
1693 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1694 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1695 first_continuation = False
1696 if parent is None:
1697 first_continuation = True
1698
1699 for page_num in itertools.count(0):
1700 if not continuation:
1701 break
f4f751af 1702 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a06916d9 1703 retries = self.get_param('extractor_retries', 3)
a1c5d2ca
M
1704 count = -1
1705 last_error = None
1706
1707 while count < retries:
1708 count += 1
1709 if last_error:
1710 self.report_warning('%s. Retrying ...' % last_error)
1711 try:
1712 query = {
1713 'ctoken': continuation['ctoken'],
1714 'pbj': 1,
1715 'type': 'next',
1716 }
1717 if parent:
1718 query['action_get_comment_replies'] = 1
1719 else:
1720 query['action_get_comments'] = 1
1721
1722 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1723 if page_num == 0:
1724 if first_continuation:
d92f5d5a 1725 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1726 else:
d92f5d5a 1727 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1728 else:
d92f5d5a 1729 note_prefix = '%sDownloading comment%s page %d %s' % (
1730 ' ' if parent else '',
a1c5d2ca
M
1731 ' replies' if parent else '',
1732 page_num,
1733 comment_prog_str)
1734
1735 browse = self._download_json(
1736 'https://www.youtube.com/comment_service_ajax', None,
1737 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1738 headers=headers, query=query,
1739 data=urlencode_postdata({
1740 'session_token': session_token_list[0]
1741 }))
1742 except ExtractorError as e:
1743 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1744 if e.cause.code == 413:
d92f5d5a 1745 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1746 return
1747 # Downloading page may result in intermittent 5xx HTTP error
1748 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1749 last_error = 'HTTP Error %s' % e.cause.code
1750 if e.cause.code == 404:
d92f5d5a 1751 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1752 if count < retries:
1753 continue
1754 raise
1755 else:
1756 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1757 if session_token:
1758 session_token_list[0] = session_token
1759
1760 response = try_get(browse,
1761 (lambda x: x['response'],
1762 lambda x: x[1]['response'])) or {}
1763
1764 if response.get('continuationContents'):
1765 break
1766
1767 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1768 if browse.get('reload'):
d92f5d5a 1769 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1770
1771 # TODO: not tested, merged from old extractor
1772 err_msg = browse.get('externalErrorMessage')
1773 if err_msg:
1774 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1775
1776 # Youtube sometimes sends incomplete data
1777 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1778 last_error = 'Incomplete data received'
1779 if count >= retries:
6a39ee13 1780 raise ExtractorError(last_error)
a1c5d2ca
M
1781
1782 if not response:
1783 break
f4f751af 1784 visitor_data = try_get(
1785 response,
1786 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1787 compat_str) or visitor_data
a1c5d2ca
M
1788
1789 known_continuation_renderers = {
1790 'itemSectionContinuation': extract_thread,
1791 'commentRepliesContinuation': extract_thread
1792 }
1793
1794 # extract next root continuation from the results
1795 continuation_contents = try_get(
1796 response, lambda x: x['continuationContents'], dict) or {}
1797
1798 for key, value in continuation_contents.items():
1799 if key not in known_continuation_renderers:
1800 continue
1801 continuation_renderer = value
1802
1803 if first_continuation:
1804 first_continuation = False
1805 expected_comment_count = try_get(
1806 continuation_renderer,
1807 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1808 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1809 compat_str)
1810
1811 if expected_comment_count:
1812 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1813 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1814 yield comment_counts[1]
1815
1816 # TODO: cli arg.
1817 # 1/True for newest, 0/False for popular (default)
1818 comment_sort_index = int(True)
1819 sort_continuation_renderer = try_get(
1820 continuation_renderer,
1821 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1822 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1823 # If this fails, the initial continuation page
1824 # starts off with popular anyways.
1825 if sort_continuation_renderer:
1826 continuation = YoutubeTabIE._build_continuation_query(
1827 continuation=sort_continuation_renderer.get('continuation'),
1828 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1829 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1830 break
1831
1832 for entry in known_continuation_renderers[key](continuation_renderer):
1833 yield entry
1834
1835 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1836 break
1837
1838 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1839 """Entry for comment extraction"""
1840 comments = []
1841 known_entry_comment_renderers = (
1842 'itemSectionRenderer',
1843 )
1844 estimated_total = 0
1845 for entry in contents:
1846 for key, renderer in entry.items():
1847 if key not in known_entry_comment_renderers:
1848 continue
1849
1850 comment_iter = self._comment_entries(
1851 renderer,
1852 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1853 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1854 ytcfg=ytcfg,
a1c5d2ca
M
1855 session_token_list=[xsrf_token])
1856
1857 for comment in comment_iter:
1858 if isinstance(comment, int):
1859 estimated_total = comment
1860 continue
1861 comments.append(comment)
1862 break
d92f5d5a 1863 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1864 return {
1865 'comments': comments,
1866 'comment_count': len(comments),
1867 }
1868
c5e8d7af 1869 def _real_extract(self, url):
cf7e015f 1870 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1871 video_id = self._match_id(url)
9297939e 1872
1873 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
1874
545cc85d 1875 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1876 webpage_url = base_url + 'watch?v=' + video_id
1877 webpage = self._download_webpage(
cce889b9 1878 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1879
9297939e 1880 def get_text(x):
1881 if not x:
1882 return
1883 text = x.get('simpleText')
1884 if text and isinstance(text, compat_str):
1885 return text
1886 runs = x.get('runs')
1887 if not isinstance(runs, list):
1888 return
1889 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
1890
1891 ytm_streaming_data = {}
1892 if is_music_url:
1893 # we are forcing to use parse_json because 141 only appeared in get_video_info.
1894 # el, c, cver, cplayer field required for 141(aac 256kbps) codec
1895 # maybe paramter of youtube music player?
1896 ytm_player_response = self._parse_json(try_get(compat_parse_qs(
1897 self._download_webpage(
1898 base_url + 'get_video_info', video_id,
fe03a6cd 1899 'Fetching youtube music info webpage',
1900 'unable to download youtube music info webpage', query={
9297939e 1901 'video_id': video_id,
1902 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1903 'el': 'detailpage',
1904 'c': 'WEB_REMIX',
1905 'cver': '0.1',
1906 'cplayer': 'UNIPLAYER'
1907 }, fatal=False)),
1908 lambda x: x['player_response'][0],
1909 compat_str) or '{}', video_id)
1910 ytm_streaming_data = ytm_player_response.get('streamingData') or {}
1911
545cc85d 1912 player_response = None
1913 if webpage:
1914 player_response = self._extract_yt_initial_variable(
1915 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1916 video_id, 'initial player response')
f4f751af 1917
1918 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1919 if not player_response:
1920 player_response = self._call_api(
f4f751af 1921 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1922
1923 playability_status = player_response.get('playabilityStatus') or {}
1924 if playability_status.get('reason') == 'Sign in to confirm your age':
1925 pr = self._parse_json(try_get(compat_parse_qs(
1926 self._download_webpage(
1927 base_url + 'get_video_info', video_id,
1928 'Refetching age-gated info webpage',
1929 'unable to download video info webpage', query={
1930 'video_id': video_id,
7c60c33e 1931 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1932 }, fatal=False)),
1933 lambda x: x['player_response'][0],
1934 compat_str) or '{}', video_id)
1935 if pr:
1936 player_response = pr
1937
1938 trailer_video_id = try_get(
1939 playability_status,
1940 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1941 compat_str)
1942 if trailer_video_id:
1943 return self.url_result(
1944 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1945
545cc85d 1946 search_meta = (
1947 lambda x: self._html_search_meta(x, webpage, default=None)) \
1948 if webpage else lambda x: None
dbdaaa23 1949
545cc85d 1950 video_details = player_response.get('videoDetails') or {}
37357d21 1951 microformat = try_get(
545cc85d 1952 player_response,
1953 lambda x: x['microformat']['playerMicroformatRenderer'],
1954 dict) or {}
1955 video_title = video_details.get('title') \
1956 or get_text(microformat.get('title')) \
1957 or search_meta(['og:title', 'twitter:title', 'title'])
1958 video_description = video_details.get('shortDescription')
cf7e015f 1959
8fe10494 1960 if not smuggled_data.get('force_singlefeed', False):
a06916d9 1961 if not self.get_param('noplaylist'):
8fe10494
S
1962 multifeed_metadata_list = try_get(
1963 player_response,
1964 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1965 compat_str)
8fe10494
S
1966 if multifeed_metadata_list:
1967 entries = []
1968 feed_ids = []
1969 for feed in multifeed_metadata_list.split(','):
1970 # Unquote should take place before split on comma (,) since textual
1971 # fields may contain comma as well (see
067aa17e 1972 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1973 feed_data = compat_parse_qs(
1974 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1975
1976 def feed_entry(name):
545cc85d 1977 return try_get(
1978 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1979
1980 feed_id = feed_entry('id')
1981 if not feed_id:
1982 continue
1983 feed_title = feed_entry('title')
1984 title = video_title
1985 if feed_title:
1986 title += ' (%s)' % feed_title
8fe10494
S
1987 entries.append({
1988 '_type': 'url_transparent',
1989 'ie_key': 'Youtube',
1990 'url': smuggle_url(
545cc85d 1991 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1992 {'force_singlefeed': True}),
6b09401b 1993 'title': title,
8fe10494 1994 })
6b09401b 1995 feed_ids.append(feed_id)
8fe10494
S
1996 self.to_screen(
1997 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1998 % (', '.join(feed_ids), video_id))
545cc85d 1999 return self.playlist_result(
2000 entries, video_id, video_title, video_description)
8fe10494
S
2001 else:
2002 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2003
9297939e 2004 formats, itags, stream_ids = [], [], []
cc2db878 2005 itag_qualities = {}
545cc85d 2006 player_url = None
d3fc8074 2007 q = qualities([
2008 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2009 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2010 ])
9297939e 2011
545cc85d 2012 streaming_data = player_response.get('streamingData') or {}
2013 streaming_formats = streaming_data.get('formats') or []
2014 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2015 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2016 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2017
545cc85d 2018 for fmt in streaming_formats:
2019 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2020 continue
321bf820 2021
cc2db878 2022 itag = str_or_none(fmt.get('itag'))
9297939e 2023 audio_track = fmt.get('audioTrack') or {}
2024 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2025 if stream_id in stream_ids:
2026 continue
2027
cc2db878 2028 quality = fmt.get('quality')
d3fc8074 2029 if quality == 'tiny' or not quality:
2030 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2031 if itag and quality:
2032 itag_qualities[itag] = quality
2033 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2034 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2035 # number of fragment that would subsequently requested with (`&sq=N`)
2036 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2037 continue
2038
545cc85d 2039 fmt_url = fmt.get('url')
2040 if not fmt_url:
2041 sc = compat_parse_qs(fmt.get('signatureCipher'))
2042 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2043 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2044 if not (sc and fmt_url and encrypted_sig):
2045 continue
2046 if not player_url:
2047 if not webpage:
2048 continue
2049 player_url = self._search_regex(
2050 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
2051 webpage, 'player URL', fatal=False)
2052 if not player_url:
201e9eaa 2053 continue
545cc85d 2054 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2055 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2056 fmt_url += '&' + sp + '=' + signature
2057
545cc85d 2058 if itag:
2059 itags.append(itag)
9297939e 2060 stream_ids.append(stream_id)
2061
cc2db878 2062 tbr = float_or_none(
2063 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2064 dct = {
2065 'asr': int_or_none(fmt.get('audioSampleRate')),
2066 'filesize': int_or_none(fmt.get('contentLength')),
2067 'format_id': itag,
0fb983f6 2068 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2069 'fps': int_or_none(fmt.get('fps')),
2070 'height': int_or_none(fmt.get('height')),
dca3ff4a 2071 'quality': q(quality),
cc2db878 2072 'tbr': tbr,
545cc85d 2073 'url': fmt_url,
2074 'width': fmt.get('width'),
0fb983f6 2075 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2076 }
2077 mimetype = fmt.get('mimeType')
2078 if mimetype:
2079 mobj = re.match(
2080 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2081 if mobj:
2082 dct['ext'] = mimetype2ext(mobj.group(1))
2083 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2084 no_audio = dct.get('acodec') == 'none'
2085 no_video = dct.get('vcodec') == 'none'
2086 if no_audio:
2087 dct['vbr'] = tbr
2088 if no_video:
2089 dct['abr'] = tbr
2090 if no_audio or no_video:
545cc85d 2091 dct['downloader_options'] = {
2092 # Youtube throttles chunks >~10M
2093 'http_chunk_size': 10485760,
bf1317d2 2094 }
7c60c33e 2095 if dct.get('ext'):
2096 dct['container'] = dct['ext'] + '_dash'
545cc85d 2097 formats.append(dct)
2098
9297939e 2099 for sd in (streaming_data, ytm_streaming_data):
2100 hls_manifest_url = sd.get('hlsManifestUrl')
2101 if hls_manifest_url:
2102 for f in self._extract_m3u8_formats(
2103 hls_manifest_url, video_id, 'mp4', fatal=False):
2104 itag = self._search_regex(
2105 r'/itag/(\d+)', f['url'], 'itag', default=None)
2106 if itag:
2107 f['format_id'] = itag
545cc85d 2108 formats.append(f)
2109
a06916d9 2110 if self.get_param('youtube_include_dash_manifest', True):
9297939e 2111 for sd in (streaming_data, ytm_streaming_data):
2112 dash_manifest_url = sd.get('dashManifestUrl')
2113 if dash_manifest_url:
2114 for f in self._extract_mpd_formats(
2115 dash_manifest_url, video_id, fatal=False):
2116 itag = f['format_id']
2117 if itag in itags:
2118 continue
2119 if itag in itag_qualities:
9297939e 2120 f['quality'] = q(itag_qualities[itag])
2121 filesize = int_or_none(self._search_regex(
2122 r'/clen/(\d+)', f.get('fragment_base_url')
2123 or f['url'], 'file size', default=None))
2124 if filesize:
2125 f['filesize'] = filesize
2126 formats.append(f)
bf1317d2 2127
545cc85d 2128 if not formats:
a06916d9 2129 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2130 self.raise_no_formats(
545cc85d 2131 'This video is DRM protected.', expected=True)
2132 pemr = try_get(
2133 playability_status,
2134 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2135 dict) or {}
2136 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2137 subreason = pemr.get('subreason')
2138 if subreason:
2139 subreason = clean_html(get_text(subreason))
2140 if subreason == 'The uploader has not made this video available in your country.':
2141 countries = microformat.get('availableCountries')
2142 if not countries:
2143 regions_allowed = search_meta('regionsAllowed')
2144 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2145 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2146 reason += '\n' + subreason
2147 if reason:
b7da73eb 2148 self.raise_no_formats(reason, expected=True)
bf1317d2 2149
545cc85d 2150 self._sort_formats(formats)
bf1317d2 2151
545cc85d 2152 keywords = video_details.get('keywords') or []
2153 if not keywords and webpage:
2154 keywords = [
2155 unescapeHTML(m.group('content'))
2156 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2157 for keyword in keywords:
2158 if keyword.startswith('yt:stretch='):
201c1459 2159 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2160 if mobj:
2161 # NB: float is intentional for forcing float division
2162 w, h = (float(v) for v in mobj.groups())
2163 if w > 0 and h > 0:
2164 ratio = w / h
2165 for f in formats:
2166 if f.get('vcodec') != 'none':
2167 f['stretched_ratio'] = ratio
2168 break
6449cd80 2169
545cc85d 2170 thumbnails = []
2171 for container in (video_details, microformat):
2172 for thumbnail in (try_get(
2173 container,
2174 lambda x: x['thumbnail']['thumbnails'], list) or []):
2175 thumbnail_url = thumbnail.get('url')
2176 if not thumbnail_url:
bf1317d2 2177 continue
1988fab7 2178 # Sometimes youtube gives a wrong thumbnail URL. See:
2179 # https://github.com/yt-dlp/yt-dlp/issues/233
2180 # https://github.com/ytdl-org/youtube-dl/issues/28023
2181 if 'maxresdefault' in thumbnail_url:
2182 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2183 thumbnails.append({
545cc85d 2184 'url': thumbnail_url,
ff2751ac 2185 'height': int_or_none(thumbnail.get('height')),
545cc85d 2186 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2187 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2188 })
ff2751ac 2189 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2190 if thumbnail_url:
2191 thumbnails.append({
2192 'url': thumbnail_url,
2193 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2194 })
2195 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2196 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2197 thumbnails.append({
2198 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2199 'preference': 1,
2200 })
2201 self._remove_duplicate_formats(thumbnails)
545cc85d 2202
2203 category = microformat.get('category') or search_meta('genre')
2204 channel_id = video_details.get('channelId') \
2205 or microformat.get('externalChannelId') \
2206 or search_meta('channelId')
2207 duration = int_or_none(
2208 video_details.get('lengthSeconds')
2209 or microformat.get('lengthSeconds')) \
2210 or parse_duration(search_meta('duration'))
2211 is_live = video_details.get('isLive')
2212 owner_profile_url = microformat.get('ownerProfileUrl')
2213
2214 info = {
2215 'id': video_id,
2216 'title': self._live_title(video_title) if is_live else video_title,
2217 'formats': formats,
2218 'thumbnails': thumbnails,
2219 'description': video_description,
2220 'upload_date': unified_strdate(
2221 microformat.get('uploadDate')
2222 or search_meta('uploadDate')),
2223 'uploader': video_details['author'],
2224 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2225 'uploader_url': owner_profile_url,
2226 'channel_id': channel_id,
2227 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2228 'duration': duration,
2229 'view_count': int_or_none(
2230 video_details.get('viewCount')
2231 or microformat.get('viewCount')
2232 or search_meta('interactionCount')),
2233 'average_rating': float_or_none(video_details.get('averageRating')),
2234 'age_limit': 18 if (
2235 microformat.get('isFamilySafe') is False
2236 or search_meta('isFamilyFriendly') == 'false'
2237 or search_meta('og:restrictions:age') == '18+') else 0,
2238 'webpage_url': webpage_url,
2239 'categories': [category] if category else None,
2240 'tags': keywords,
2241 'is_live': is_live,
2242 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2243 'was_live': video_details.get('isLiveContent'),
545cc85d 2244 }
b477fc13 2245
545cc85d 2246 pctr = try_get(
2247 player_response,
2248 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2249 subtitles = {}
2250 if pctr:
774d79cc 2251 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2252 lang_subs = container.setdefault(lang_code, [])
545cc85d 2253 for fmt in self._SUBTITLE_FORMATS:
2254 query.update({
2255 'fmt': fmt,
2256 })
2257 lang_subs.append({
2258 'ext': fmt,
2259 'url': update_url_query(base_url, query),
774d79cc 2260 'name': sub_name,
545cc85d 2261 })
7e72694b 2262
545cc85d 2263 for caption_track in (pctr.get('captionTracks') or []):
2264 base_url = caption_track.get('baseUrl')
2265 if not base_url:
2266 continue
2267 if caption_track.get('kind') != 'asr':
120916da 2268 lang_code = (
2269 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2270 or caption_track.get('languageCode'))
545cc85d 2271 if not lang_code:
2272 continue
2273 process_language(
774d79cc 2274 subtitles, base_url, lang_code,
2275 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2276 {})
545cc85d 2277 continue
2278 automatic_captions = {}
2279 for translation_language in (pctr.get('translationLanguages') or []):
2280 translation_language_code = translation_language.get('languageCode')
2281 if not translation_language_code:
2282 continue
2283 process_language(
2284 automatic_captions, base_url, translation_language_code,
774d79cc 2285 try_get(translation_language, lambda x: x['languageName']['simpleText']),
545cc85d 2286 {'tlang': translation_language_code})
2287 info['automatic_captions'] = automatic_captions
2288 info['subtitles'] = subtitles
7e72694b 2289
545cc85d 2290 parsed_url = compat_urllib_parse_urlparse(url)
2291 for component in [parsed_url.fragment, parsed_url.query]:
2292 query = compat_parse_qs(component)
2293 for k, v in query.items():
2294 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2295 d_k += '_time'
2296 if d_k not in info and k in s_ks:
2297 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2298
2299 # Youtube Music Auto-generated description
822b9d9c 2300 if video_description:
38d70284 2301 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2302 if mobj:
822b9d9c
RA
2303 release_year = mobj.group('release_year')
2304 release_date = mobj.group('release_date')
2305 if release_date:
2306 release_date = release_date.replace('-', '')
2307 if not release_year:
545cc85d 2308 release_year = release_date[:4]
2309 info.update({
2310 'album': mobj.group('album'.strip()),
2311 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2312 'track': mobj.group('track').strip(),
2313 'release_date': release_date,
cc2db878 2314 'release_year': int_or_none(release_year),
545cc85d 2315 })
7e72694b 2316
545cc85d 2317 initial_data = None
2318 if webpage:
2319 initial_data = self._extract_yt_initial_variable(
2320 webpage, self._YT_INITIAL_DATA_RE, video_id,
2321 'yt initial data')
2322 if not initial_data:
2323 initial_data = self._call_api(
f4f751af 2324 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2325
2326 if not is_live:
2327 try:
2328 # This will error if there is no livechat
2329 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2330 info['subtitles']['live_chat'] = [{
394dcd44 2331 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2332 'video_id': video_id,
2333 'ext': 'json',
2334 'protocol': 'youtube_live_chat_replay',
2335 }]
2336 except (KeyError, IndexError, TypeError):
2337 pass
2338
2339 if initial_data:
2340 chapters = self._extract_chapters_from_json(
2341 initial_data, video_id, duration)
2342 if not chapters:
2343 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2344 contents = try_get(
2345 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2346 list)
2347 if not contents:
2348 continue
2349
2350 def chapter_time(mmlir):
2351 return parse_duration(
2352 get_text(mmlir.get('timeDescription')))
2353
2354 chapters = []
2355 for next_num, content in enumerate(contents, start=1):
2356 mmlir = content.get('macroMarkersListItemRenderer') or {}
2357 start_time = chapter_time(mmlir)
2358 end_time = chapter_time(try_get(
2359 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2360 if next_num < len(contents) else duration
2361 if start_time is None or end_time is None:
2362 continue
2363 chapters.append({
2364 'start_time': start_time,
2365 'end_time': end_time,
2366 'title': get_text(mmlir.get('title')),
2367 })
2368 if chapters:
2369 break
2370 if chapters:
2371 info['chapters'] = chapters
2372
2373 contents = try_get(
2374 initial_data,
2375 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2376 list) or []
2377 for content in contents:
2378 vpir = content.get('videoPrimaryInfoRenderer')
2379 if vpir:
2380 stl = vpir.get('superTitleLink')
2381 if stl:
2382 stl = get_text(stl)
2383 if try_get(
2384 vpir,
2385 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2386 info['location'] = stl
2387 else:
2388 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2389 if mobj:
2390 info.update({
2391 'series': mobj.group(1),
2392 'season_number': int(mobj.group(2)),
2393 'episode_number': int(mobj.group(3)),
2394 })
2395 for tlb in (try_get(
2396 vpir,
2397 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2398 list) or []):
2399 tbr = tlb.get('toggleButtonRenderer') or {}
2400 for getter, regex in [(
2401 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2402 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2403 lambda x: x['accessibility'],
2404 lambda x: x['accessibilityData']['accessibilityData'],
2405 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2406 label = (try_get(tbr, getter, dict) or {}).get('label')
2407 if label:
2408 mobj = re.match(regex, label)
2409 if mobj:
2410 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2411 break
2412 sbr_tooltip = try_get(
2413 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2414 if sbr_tooltip:
2415 like_count, dislike_count = sbr_tooltip.split(' / ')
2416 info.update({
2417 'like_count': str_to_int(like_count),
2418 'dislike_count': str_to_int(dislike_count),
2419 })
2420 vsir = content.get('videoSecondaryInfoRenderer')
2421 if vsir:
2422 info['channel'] = get_text(try_get(
2423 vsir,
2424 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2425 dict))
545cc85d 2426 rows = try_get(
2427 vsir,
2428 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2429 list) or []
2430 multiple_songs = False
2431 for row in rows:
2432 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2433 multiple_songs = True
2434 break
2435 for row in rows:
2436 mrr = row.get('metadataRowRenderer') or {}
2437 mrr_title = mrr.get('title')
2438 if not mrr_title:
2439 continue
2440 mrr_title = get_text(mrr['title'])
2441 mrr_contents_text = get_text(mrr['contents'][0])
2442 if mrr_title == 'License':
2443 info['license'] = mrr_contents_text
2444 elif not multiple_songs:
2445 if mrr_title == 'Album':
2446 info['album'] = mrr_contents_text
2447 elif mrr_title == 'Artist':
2448 info['artist'] = mrr_contents_text
2449 elif mrr_title == 'Song':
2450 info['track'] = mrr_contents_text
2451
2452 fallbacks = {
2453 'channel': 'uploader',
2454 'channel_id': 'uploader_id',
2455 'channel_url': 'uploader_url',
2456 }
2457 for to, frm in fallbacks.items():
2458 if not info.get(to):
2459 info[to] = info.get(frm)
2460
2461 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2462 v = info.get(s_k)
2463 if v:
2464 info[d_k] = v
b84071c0 2465
c224251a
M
2466 is_private = bool_or_none(video_details.get('isPrivate'))
2467 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2468 is_membersonly = None
b28f8d24 2469 is_premium = None
c224251a
M
2470 if initial_data and is_private is not None:
2471 is_membersonly = False
b28f8d24 2472 is_premium = False
c224251a
M
2473 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2474 for content in contents or []:
2475 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2476 for badge in badges or []:
2477 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2478 if label.lower() == 'members only':
2479 is_membersonly = True
2480 break
b28f8d24
M
2481 elif label.lower() == 'premium':
2482 is_premium = True
2483 break
2484 if is_membersonly or is_premium:
c224251a
M
2485 break
2486
2487 # TODO: Add this for playlists
2488 info['availability'] = self._availability(
2489 is_private=is_private,
b28f8d24 2490 needs_premium=is_premium,
c224251a
M
2491 needs_subscription=is_membersonly,
2492 needs_auth=info['age_limit'] >= 18,
2493 is_unlisted=None if is_private is None else is_unlisted)
2494
06167fbb 2495 # get xsrf for annotations or comments
a06916d9 2496 get_annotations = self.get_param('writeannotations', False)
2497 get_comments = self.get_param('getcomments', False)
06167fbb 2498 if get_annotations or get_comments:
29f7c58a 2499 xsrf_token = None
545cc85d 2500 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2501 if ytcfg:
2502 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2503 if not xsrf_token:
2504 xsrf_token = self._search_regex(
2505 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2506 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2507
2508 # annotations
06167fbb 2509 if get_annotations:
64b6a4e9
RA
2510 invideo_url = try_get(
2511 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2512 if xsrf_token and invideo_url:
29f7c58a 2513 xsrf_field_name = None
2514 if ytcfg:
2515 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2516 if not xsrf_field_name:
2517 xsrf_field_name = self._search_regex(
2518 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2519 webpage, 'xsrf field name',
29f7c58a 2520 group='xsrf_field_name', default='session_token')
8a784c74 2521 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2522 self._proto_relative_url(invideo_url),
2523 video_id, note='Downloading annotations',
2524 errnote='Unable to download video annotations', fatal=False,
2525 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2526
277d6ff5 2527 if get_comments:
a1c5d2ca 2528 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2529
545cc85d 2530 self.mark_watched(video_id, player_response)
d77ab8e2 2531
545cc85d 2532 return info
c5e8d7af 2533
5f6a1245 2534
8bdd16b4 2535class YoutubeTabIE(YoutubeBaseInfoExtractor):
2536 IE_DESC = 'YouTube.com tab'
70d5c17b 2537 _VALID_URL = r'''(?x)
2538 https?://
2539 (?:\w+\.)?
2540 (?:
2541 youtube(?:kids)?\.com|
2542 invidio\.us
2543 )/
2544 (?:
fe03a6cd 2545 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 2546 (?P<not_channel>
9ba5705a 2547 feed/|hashtag/|
70d5c17b 2548 (?:playlist|watch)\?.*?\blist=
2549 )|
29f7c58a 2550 (?!(?:%s)\b) # Direct URLs
70d5c17b 2551 )
2552 (?P<id>[^/?\#&]+)
2553 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2554 IE_NAME = 'youtube:tab'
2555
81127aa5 2556 _TESTS = [{
da692b79 2557 'note': 'playlists, multipage',
8bdd16b4 2558 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2559 'playlist_mincount': 94,
2560 'info_dict': {
2561 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2562 'title': 'Игорь Клейнер - Playlists',
2563 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2564 'uploader': 'Игорь Клейнер',
2565 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2566 },
2567 }, {
da692b79 2568 'note': 'playlists, multipage, different order',
8bdd16b4 2569 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2570 'playlist_mincount': 94,
2571 'info_dict': {
2572 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2573 'title': 'Игорь Клейнер - Playlists',
2574 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2575 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2576 'uploader': 'Игорь Клейнер',
8bdd16b4 2577 },
201c1459 2578 }, {
da692b79 2579 'note': 'playlists, series',
201c1459 2580 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2581 'playlist_mincount': 5,
2582 'info_dict': {
2583 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2584 'title': '3Blue1Brown - Playlists',
2585 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 2586 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2587 'uploader': '3Blue1Brown',
201c1459 2588 },
8bdd16b4 2589 }, {
da692b79 2590 'note': 'playlists, singlepage',
8bdd16b4 2591 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2592 'playlist_mincount': 4,
2593 'info_dict': {
2594 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2595 'title': 'ThirstForScience - Playlists',
2596 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2597 'uploader': 'ThirstForScience',
2598 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2599 }
2600 }, {
2601 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2602 'only_matching': True,
2603 }, {
da692b79 2604 'note': 'basic, single video playlist',
0e30a7b9 2605 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2606 'info_dict': {
0e30a7b9 2607 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2608 'uploader': 'Sergey M.',
2609 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2610 'title': 'youtube-dl public playlist',
81127aa5 2611 },
0e30a7b9 2612 'playlist_count': 1,
9291475f 2613 }, {
da692b79 2614 'note': 'empty playlist',
0e30a7b9 2615 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2616 'info_dict': {
0e30a7b9 2617 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2618 'uploader': 'Sergey M.',
2619 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2620 'title': 'youtube-dl empty playlist',
9291475f
PH
2621 },
2622 'playlist_count': 0,
2623 }, {
da692b79 2624 'note': 'Home tab',
8bdd16b4 2625 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2626 'info_dict': {
8bdd16b4 2627 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2628 'title': 'lex will - Home',
2629 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2630 'uploader': 'lex will',
2631 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2632 },
8bdd16b4 2633 'playlist_mincount': 2,
9291475f 2634 }, {
da692b79 2635 'note': 'Videos tab',
8bdd16b4 2636 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2637 'info_dict': {
8bdd16b4 2638 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2639 'title': 'lex will - Videos',
2640 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2641 'uploader': 'lex will',
2642 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2643 },
8bdd16b4 2644 'playlist_mincount': 975,
9291475f 2645 }, {
da692b79 2646 'note': 'Videos tab, sorted by popular',
8bdd16b4 2647 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2648 'info_dict': {
8bdd16b4 2649 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2650 'title': 'lex will - Videos',
2651 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2652 'uploader': 'lex will',
2653 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2654 },
8bdd16b4 2655 'playlist_mincount': 199,
9291475f 2656 }, {
da692b79 2657 'note': 'Playlists tab',
8bdd16b4 2658 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2659 'info_dict': {
8bdd16b4 2660 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2661 'title': 'lex will - Playlists',
2662 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2663 'uploader': 'lex will',
2664 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2665 },
8bdd16b4 2666 'playlist_mincount': 17,
ac7553d0 2667 }, {
da692b79 2668 'note': 'Community tab',
8bdd16b4 2669 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2670 'info_dict': {
8bdd16b4 2671 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2672 'title': 'lex will - Community',
2673 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2674 'uploader': 'lex will',
2675 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2676 },
2677 'playlist_mincount': 18,
87dadd45 2678 }, {
da692b79 2679 'note': 'Channels tab',
8bdd16b4 2680 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2681 'info_dict': {
8bdd16b4 2682 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2683 'title': 'lex will - Channels',
2684 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2685 'uploader': 'lex will',
2686 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2687 },
deaec5af 2688 'playlist_mincount': 12,
cd684175 2689 }, {
2690 'note': 'Search tab',
2691 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
2692 'playlist_mincount': 40,
2693 'info_dict': {
2694 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2695 'title': '3Blue1Brown - Search - linear algebra',
2696 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2697 'uploader': '3Blue1Brown',
2698 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2699 },
6b08cdf6 2700 }, {
a0566bbf 2701 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2702 'only_matching': True,
2703 }, {
a0566bbf 2704 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2705 'only_matching': True,
2706 }, {
a0566bbf 2707 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2708 'only_matching': True,
2709 }, {
2710 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2711 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2712 'info_dict': {
2713 'title': '29C3: Not my department',
2714 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2715 'uploader': 'Christiaan008',
2716 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2717 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2718 },
2719 'playlist_count': 96,
2720 }, {
2721 'note': 'Large playlist',
2722 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2723 'info_dict': {
8bdd16b4 2724 'title': 'Uploads from Cauchemar',
2725 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2726 'uploader': 'Cauchemar',
2727 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2728 },
8bdd16b4 2729 'playlist_mincount': 1123,
2730 }, {
da692b79 2731 'note': 'even larger playlist, 8832 videos',
8bdd16b4 2732 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2733 'only_matching': True,
4b7df0d3
JMF
2734 }, {
2735 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2736 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2737 'info_dict': {
acf757f4
PH
2738 'title': 'Uploads from Interstellar Movie',
2739 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2740 'uploader': 'Interstellar Movie',
8bdd16b4 2741 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2742 },
481cc733 2743 'playlist_mincount': 21,
358de58c 2744 }, {
2745 'note': 'Playlist with "show unavailable videos" button',
2746 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2747 'info_dict': {
2748 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2749 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2750 'uploader': 'Phim Siêu Nhân Nhật Bản',
2751 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2752 },
da692b79 2753 'playlist_mincount': 200,
5d342002 2754 }, {
da692b79 2755 'note': 'Playlist with unavailable videos in page 7',
5d342002 2756 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2757 'info_dict': {
2758 'title': 'Uploads from BlankTV',
2759 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2760 'uploader': 'BlankTV',
2761 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2762 },
da692b79 2763 'playlist_mincount': 1000,
8bdd16b4 2764 }, {
da692b79 2765 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 2766 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2767 'info_dict': {
2768 'title': 'Data Analysis with Dr Mike Pound',
2769 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2770 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2771 'uploader': 'Computerphile',
deaec5af 2772 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2773 },
2774 'playlist_mincount': 11,
2775 }, {
a0566bbf 2776 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2777 'only_matching': True,
dacb3a86 2778 }, {
da692b79 2779 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
2780 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2781 'info_dict': {
2782 'id': 'FqZTN594JQw',
2783 'ext': 'webm',
2784 'title': "Smiley's People 01 detective, Adventure Series, Action",
2785 'uploader': 'STREEM',
2786 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2787 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2788 'upload_date': '20150526',
2789 'license': 'Standard YouTube License',
2790 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2791 'categories': ['People & Blogs'],
2792 'tags': list,
dbdaaa23 2793 'view_count': int,
dacb3a86
S
2794 'like_count': int,
2795 'dislike_count': int,
2796 },
2797 'params': {
2798 'skip_download': True,
2799 },
13a75688 2800 'skip': 'This video is not available.',
dacb3a86 2801 'add_ie': [YoutubeIE.ie_key()],
481cc733 2802 }, {
8bdd16b4 2803 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2804 'only_matching': True,
66b48727 2805 }, {
8bdd16b4 2806 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2807 'only_matching': True,
a0566bbf 2808 }, {
2809 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2810 'info_dict': {
da692b79 2811 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 2812 'ext': 'mp4',
deaec5af 2813 'title': compat_str,
a0566bbf 2814 'uploader': 'Sky News',
2815 'uploader_id': 'skynews',
2816 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 2817 'upload_date': r're:\d{8}',
2818 'description': compat_str,
a0566bbf 2819 'categories': ['News & Politics'],
2820 'tags': list,
2821 'like_count': int,
2822 'dislike_count': int,
2823 },
2824 'params': {
2825 'skip_download': True,
2826 },
da692b79 2827 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 2828 }, {
2829 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2830 'info_dict': {
2831 'id': 'a48o2S1cPoo',
2832 'ext': 'mp4',
2833 'title': 'The Young Turks - Live Main Show',
2834 'uploader': 'The Young Turks',
2835 'uploader_id': 'TheYoungTurks',
2836 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2837 'upload_date': '20150715',
2838 'license': 'Standard YouTube License',
2839 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2840 'categories': ['News & Politics'],
2841 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2842 'like_count': int,
2843 'dislike_count': int,
2844 },
2845 'params': {
2846 'skip_download': True,
2847 },
2848 'only_matching': True,
2849 }, {
2850 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2851 'only_matching': True,
2852 }, {
2853 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2854 'only_matching': True,
3d3dddc9 2855 }, {
2856 'url': 'https://www.youtube.com/feed/trending',
2857 'only_matching': True,
2858 }, {
3d3dddc9 2859 'url': 'https://www.youtube.com/feed/library',
2860 'only_matching': True,
2861 }, {
3d3dddc9 2862 'url': 'https://www.youtube.com/feed/history',
2863 'only_matching': True,
2864 }, {
3d3dddc9 2865 'url': 'https://www.youtube.com/feed/subscriptions',
2866 'only_matching': True,
2867 }, {
3d3dddc9 2868 'url': 'https://www.youtube.com/feed/watch_later',
2869 'only_matching': True,
2870 }, {
da692b79 2871 'note': 'Recommended - redirects to home page',
3d3dddc9 2872 'url': 'https://www.youtube.com/feed/recommended',
2873 'only_matching': True,
29f7c58a 2874 }, {
da692b79 2875 'note': 'inline playlist with not always working continuations',
29f7c58a 2876 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2877 'only_matching': True,
2878 }, {
2879 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2880 'only_matching': True,
2881 }, {
2882 'url': 'https://www.youtube.com/course',
2883 'only_matching': True,
2884 }, {
2885 'url': 'https://www.youtube.com/zsecurity',
2886 'only_matching': True,
2887 }, {
2888 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2889 'only_matching': True,
2890 }, {
2891 'url': 'https://www.youtube.com/TheYoungTurks/live',
2892 'only_matching': True,
39ed931e 2893 }, {
2894 'url': 'https://www.youtube.com/hashtag/cctv9',
2895 'info_dict': {
2896 'id': 'cctv9',
2897 'title': '#cctv9',
2898 },
2899 'playlist_mincount': 350,
201c1459 2900 }, {
2901 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2902 'only_matching': True,
9297939e 2903 }, {
da692b79 2904 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 2905 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2906 'only_matching': True
fe03a6cd 2907 }, {
2908 'note': '/browse/ should redirect to /channel/',
2909 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
2910 'only_matching': True
2911 }, {
2912 'note': 'VLPL, should redirect to playlist?list=PL...',
2913 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2914 'info_dict': {
2915 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2916 'uploader': 'NoCopyrightSounds',
2917 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
2918 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
2919 'title': 'NCS Releases',
2920 },
2921 'playlist_mincount': 166,
18db7548 2922 }, {
2923 'note': 'Topic, should redirect to playlist?list=UU...',
2924 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
2925 'info_dict': {
2926 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
2927 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
2928 'title': 'Uploads from Royalty Free Music - Topic',
2929 'uploader': 'Royalty Free Music - Topic',
2930 },
2931 'expected_warnings': [
2932 'A channel/user page was given',
2933 'The URL does not have a videos tab',
2934 ],
2935 'playlist_mincount': 101,
2936 }, {
2937 'note': 'Topic without a UU playlist',
2938 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
2939 'info_dict': {
2940 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
2941 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
2942 },
2943 'expected_warnings': [
2944 'A channel/user page was given',
2945 'The URL does not have a videos tab',
2946 'Falling back to channel URL',
2947 ],
2948 'playlist_mincount': 9,
abcdd12b 2949 }, {
2950 'note': 'Youtube music Album',
2951 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
2952 'info_dict': {
2953 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
2954 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
2955 },
2956 'playlist_count': 50,
29f7c58a 2957 }]
2958
2959 @classmethod
2960 def suitable(cls, url):
2961 return False if YoutubeIE.suitable(url) else super(
2962 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2963
2964 def _extract_channel_id(self, webpage):
2965 channel_id = self._html_search_meta(
2966 'channelId', webpage, 'channel id', default=None)
2967 if channel_id:
2968 return channel_id
2969 channel_url = self._html_search_meta(
2970 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2971 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2972 'twitter:app:url:googleplay'), webpage, 'channel url')
2973 return self._search_regex(
2974 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2975 channel_url, 'channel id')
15f6397c 2976
8bdd16b4 2977 @staticmethod
cd7c66cf 2978 def _extract_basic_item_renderer(item):
2979 # Modified from _extract_grid_item_renderer
201c1459 2980 known_basic_renderers = (
2981 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 2982 )
2983 for key, renderer in item.items():
201c1459 2984 if not isinstance(renderer, dict):
cd7c66cf 2985 continue
201c1459 2986 elif key in known_basic_renderers:
2987 return renderer
2988 elif key.startswith('grid') and key.endswith('Renderer'):
2989 return renderer
8bdd16b4 2990
8bdd16b4 2991 def _grid_entries(self, grid_renderer):
2992 for item in grid_renderer['items']:
2993 if not isinstance(item, dict):
39b62db1 2994 continue
cd7c66cf 2995 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2996 if not isinstance(renderer, dict):
2997 continue
2998 title = try_get(
201c1459 2999 renderer, (lambda x: x['title']['runs'][0]['text'],
3000 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 3001 # playlist
3002 playlist_id = renderer.get('playlistId')
3003 if playlist_id:
3004 yield self.url_result(
3005 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3006 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3007 video_title=title)
201c1459 3008 continue
8bdd16b4 3009 # video
3010 video_id = renderer.get('videoId')
3011 if video_id:
3012 yield self._extract_video(renderer)
201c1459 3013 continue
8bdd16b4 3014 # channel
3015 channel_id = renderer.get('channelId')
3016 if channel_id:
3017 title = try_get(
3018 renderer, lambda x: x['title']['simpleText'], compat_str)
3019 yield self.url_result(
3020 'https://www.youtube.com/channel/%s' % channel_id,
3021 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3022 continue
3023 # generic endpoint URL support
3024 ep_url = urljoin('https://www.youtube.com/', try_get(
3025 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3026 compat_str))
3027 if ep_url:
3028 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3029 if ie.suitable(ep_url):
3030 yield self.url_result(
3031 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3032 break
8bdd16b4 3033
3d3dddc9 3034 def _shelf_entries_from_content(self, shelf_renderer):
3035 content = shelf_renderer.get('content')
3036 if not isinstance(content, dict):
8bdd16b4 3037 return
cd7c66cf 3038 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3039 if renderer:
3040 # TODO: add support for nested playlists so each shelf is processed
3041 # as separate playlist
3042 # TODO: this includes only first N items
3043 for entry in self._grid_entries(renderer):
3044 yield entry
3045 renderer = content.get('horizontalListRenderer')
3046 if renderer:
3047 # TODO
3048 pass
8bdd16b4 3049
29f7c58a 3050 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3051 ep = try_get(
3052 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3053 compat_str)
3054 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3055 if shelf_url:
29f7c58a 3056 # Skipping links to another channels, note that checking for
3057 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3058 # will not work
3059 if skip_channels and '/channels?' in shelf_url:
3060 return
3d3dddc9 3061 title = try_get(
3062 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3063 yield self.url_result(shelf_url, video_title=title)
3064 # Shelf may not contain shelf URL, fallback to extraction from content
3065 for entry in self._shelf_entries_from_content(shelf_renderer):
3066 yield entry
c5e8d7af 3067
8bdd16b4 3068 def _playlist_entries(self, video_list_renderer):
3069 for content in video_list_renderer['contents']:
3070 if not isinstance(content, dict):
3071 continue
3072 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3073 if not isinstance(renderer, dict):
3074 continue
3075 video_id = renderer.get('videoId')
3076 if not video_id:
3077 continue
3078 yield self._extract_video(renderer)
07aeced6 3079
3462ffa8 3080 def _rich_entries(self, rich_grid_renderer):
3081 renderer = try_get(
70d5c17b 3082 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3083 video_id = renderer.get('videoId')
3084 if not video_id:
3085 return
3086 yield self._extract_video(renderer)
3087
8bdd16b4 3088 def _video_entry(self, video_renderer):
3089 video_id = video_renderer.get('videoId')
3090 if video_id:
3091 return self._extract_video(video_renderer)
dacb3a86 3092
8bdd16b4 3093 def _post_thread_entries(self, post_thread_renderer):
3094 post_renderer = try_get(
3095 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3096 if not post_renderer:
3097 return
3098 # video attachment
3099 video_renderer = try_get(
895b0931 3100 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3101 video_id = video_renderer.get('videoId')
3102 if video_id:
3103 entry = self._extract_video(video_renderer)
8bdd16b4 3104 if entry:
3105 yield entry
895b0931 3106 # playlist attachment
3107 playlist_id = try_get(
3108 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3109 if playlist_id:
3110 yield self.url_result(
e28f1c0a 3111 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3112 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3113 # inline video links
3114 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3115 for run in runs:
3116 if not isinstance(run, dict):
3117 continue
3118 ep_url = try_get(
3119 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3120 if not ep_url:
3121 continue
3122 if not YoutubeIE.suitable(ep_url):
3123 continue
3124 ep_video_id = YoutubeIE._match_id(ep_url)
3125 if video_id == ep_video_id:
3126 continue
895b0931 3127 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3128
8bdd16b4 3129 def _post_thread_continuation_entries(self, post_thread_continuation):
3130 contents = post_thread_continuation.get('contents')
3131 if not isinstance(contents, list):
3132 return
3133 for content in contents:
3134 renderer = content.get('backstagePostThreadRenderer')
3135 if not isinstance(renderer, dict):
3136 continue
3137 for entry in self._post_thread_entries(renderer):
3138 yield entry
07aeced6 3139
39ed931e 3140 r''' # unused
3141 def _rich_grid_entries(self, contents):
3142 for content in contents:
3143 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3144 if video_renderer:
3145 entry = self._video_entry(video_renderer)
3146 if entry:
3147 yield entry
3148 '''
3149
29f7c58a 3150 @staticmethod
3151 def _build_continuation_query(continuation, ctp=None):
3152 query = {
3153 'ctoken': continuation,
3154 'continuation': continuation,
3155 }
3156 if ctp:
3157 query['itct'] = ctp
3158 return query
3159
8bdd16b4 3160 @staticmethod
3161 def _extract_next_continuation_data(renderer):
3162 next_continuation = try_get(
3163 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3164 if not next_continuation:
3165 return
3166 continuation = next_continuation.get('continuation')
3167 if not continuation:
3168 return
3169 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3170 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3171
8bdd16b4 3172 @classmethod
3173 def _extract_continuation(cls, renderer):
3174 next_continuation = cls._extract_next_continuation_data(renderer)
3175 if next_continuation:
3176 return next_continuation
cc2db878 3177 contents = []
3178 for key in ('contents', 'items'):
3179 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3180 for content in contents:
3181 if not isinstance(content, dict):
3182 continue
3183 continuation_ep = try_get(
3184 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3185 dict)
3186 if not continuation_ep:
3187 continue
3188 continuation = try_get(
3189 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3190 if not continuation:
3191 continue
3192 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3193 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3194
f4f751af 3195 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3196
70d5c17b 3197 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3198 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3199 for content in contents:
3200 if not isinstance(content, dict):
8bdd16b4 3201 continue
70d5c17b 3202 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3203 if not is_renderer:
70d5c17b 3204 renderer = content.get('richItemRenderer')
3462ffa8 3205 if renderer:
3206 for entry in self._rich_entries(renderer):
3207 yield entry
3208 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3209 continue
3462ffa8 3210 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3211 for isr_content in isr_contents:
3212 if not isinstance(isr_content, dict):
3213 continue
69184e41 3214
3215 known_renderers = {
3216 'playlistVideoListRenderer': self._playlist_entries,
3217 'gridRenderer': self._grid_entries,
3218 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3219 'backstagePostThreadRenderer': self._post_thread_entries,
3220 'videoRenderer': lambda x: [self._video_entry(x)],
3221 }
3222 for key, renderer in isr_content.items():
3223 if key not in known_renderers:
3224 continue
3225 for entry in known_renderers[key](renderer):
3226 if entry:
3227 yield entry
3462ffa8 3228 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3229 break
70d5c17b 3230
3462ffa8 3231 if not continuation_list[0]:
3232 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3233
3234 if not continuation_list[0]:
3235 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3236
3237 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3238 tab_content = try_get(tab, lambda x: x['content'], dict)
3239 if not tab_content:
3240 return
3462ffa8 3241 parent_renderer = (
29f7c58a 3242 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3243 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3244 for entry in extract_entries(parent_renderer):
3245 yield entry
3462ffa8 3246 continuation = continuation_list[0]
f4f751af 3247 context = self._extract_context(ytcfg)
3248 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3249
8bdd16b4 3250 for page_num in itertools.count(1):
3251 if not continuation:
3252 break
79360d99 3253 query = {
3254 'continuation': continuation['continuation'],
3255 'clickTracking': {'clickTrackingParams': continuation['itct']}
3256 }
f4f751af 3257 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3258 response = self._extract_response(
3259 item_id='%s page %s' % (item_id, page_num),
3260 query=query, headers=headers, ytcfg=ytcfg,
3261 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3262
3263 if not response:
8bdd16b4 3264 break
f4f751af 3265 visitor_data = try_get(
3266 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3267
69184e41 3268 known_continuation_renderers = {
3269 'playlistVideoListContinuation': self._playlist_entries,
3270 'gridContinuation': self._grid_entries,
3271 'itemSectionContinuation': self._post_thread_continuation_entries,
3272 'sectionListContinuation': extract_entries, # for feeds
3273 }
8bdd16b4 3274 continuation_contents = try_get(
69184e41 3275 response, lambda x: x['continuationContents'], dict) or {}
3276 continuation_renderer = None
3277 for key, value in continuation_contents.items():
3278 if key not in known_continuation_renderers:
3462ffa8 3279 continue
69184e41 3280 continuation_renderer = value
3281 continuation_list = [None]
3282 for entry in known_continuation_renderers[key](continuation_renderer):
3283 yield entry
3284 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3285 break
3286 if continuation_renderer:
3287 continue
c5e8d7af 3288
a1b535bd 3289 known_renderers = {
3290 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3291 'gridVideoRenderer': (self._grid_entries, 'items'),
3292 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3293 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3294 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3295 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3296 }
cce889b9 3297 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3298 continuation_items = try_get(
cce889b9 3299 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3300 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3301 video_items_renderer = None
3302 for key, value in continuation_item.items():
3303 if key not in known_renderers:
8bdd16b4 3304 continue
a1b535bd 3305 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3306 continuation_list = [None]
a1b535bd 3307 for entry in known_renderers[key][0](video_items_renderer):
3308 yield entry
9ba5705a 3309 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3310 break
3311 if video_items_renderer:
3312 continue
8bdd16b4 3313 break
9558dcec 3314
8bdd16b4 3315 @staticmethod
3316 def _extract_selected_tab(tabs):
3317 for tab in tabs:
cd684175 3318 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3319 if renderer.get('selected') is True:
3320 return renderer
2b3c2546 3321 else:
8bdd16b4 3322 raise ExtractorError('Unable to find selected tab')
b82f815f 3323
8bdd16b4 3324 @staticmethod
3325 def _extract_uploader(data):
3326 uploader = {}
3327 sidebar_renderer = try_get(
3328 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3329 if sidebar_renderer:
3330 for item in sidebar_renderer:
3331 if not isinstance(item, dict):
3332 continue
3333 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3334 if not isinstance(renderer, dict):
3335 continue
3336 owner = try_get(
3337 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3338 if owner:
3339 uploader['uploader'] = owner.get('text')
3340 uploader['uploader_id'] = try_get(
3341 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3342 uploader['uploader_url'] = urljoin(
3343 'https://www.youtube.com/',
3344 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3345 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3346
d069eca7 3347 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3348 playlist_id = title = description = channel_url = channel_name = channel_id = None
3349 thumbnails_list = tags = []
3350
8bdd16b4 3351 selected_tab = self._extract_selected_tab(tabs)
3352 renderer = try_get(
3353 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3354 if renderer:
b60419c5 3355 channel_name = renderer.get('title')
3356 channel_url = renderer.get('channelUrl')
3357 channel_id = renderer.get('externalId')
39ed931e 3358 else:
64c0d954 3359 renderer = try_get(
3360 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3361
8bdd16b4 3362 if renderer:
3363 title = renderer.get('title')
ecc97af3 3364 description = renderer.get('description', '')
b60419c5 3365 playlist_id = channel_id
3366 tags = renderer.get('keywords', '').split()
3367 thumbnails_list = (
3368 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3369 or try_get(
3370 data,
3371 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3372 list)
b60419c5 3373 or [])
3374
3375 thumbnails = []
3376 for t in thumbnails_list:
3377 if not isinstance(t, dict):
3378 continue
3379 thumbnail_url = url_or_none(t.get('url'))
3380 if not thumbnail_url:
3381 continue
3382 thumbnails.append({
3383 'url': thumbnail_url,
3384 'width': int_or_none(t.get('width')),
3385 'height': int_or_none(t.get('height')),
3386 })
3462ffa8 3387 if playlist_id is None:
70d5c17b 3388 playlist_id = item_id
3389 if title is None:
39ed931e 3390 title = (
3391 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3392 or playlist_id)
b60419c5 3393 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3394 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3395
3396 metadata = {
3397 'playlist_id': playlist_id,
3398 'playlist_title': title,
3399 'playlist_description': description,
3400 'uploader': channel_name,
3401 'uploader_id': channel_id,
3402 'uploader_url': channel_url,
3403 'thumbnails': thumbnails,
3404 'tags': tags,
3405 }
3406 if not channel_id:
3407 metadata.update(self._extract_uploader(data))
3408 metadata.update({
3409 'channel': metadata['uploader'],
3410 'channel_id': metadata['uploader_id'],
3411 'channel_url': metadata['uploader_url']})
3412 return self.playlist_result(
d069eca7
M
3413 self._entries(
3414 selected_tab, playlist_id,
3415 self._extract_identity_token(webpage, item_id),
f4f751af 3416 self._extract_account_syncid(data),
3417 self._extract_ytcfg(item_id, webpage)),
b60419c5 3418 **metadata)
73c4ac2c 3419
79360d99 3420 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3421 first_id = last_id = None
79360d99 3422 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3423 headers = self._generate_api_headers(
3424 ytcfg, account_syncid=self._extract_account_syncid(data),
3425 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3426 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3427 for page_num in itertools.count(1):
cd7c66cf 3428 videos = list(self._playlist_entries(playlist))
3429 if not videos:
3430 return
2be71994 3431 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3432 if start >= len(videos):
3433 return
3434 for video in videos[start:]:
3435 if video['id'] == first_id:
3436 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3437 return
3438 yield video
3439 first_id = first_id or videos[0]['id']
3440 last_id = videos[-1]['id']
79360d99 3441 watch_endpoint = try_get(
3442 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3443 query = {
3444 'playlistId': playlist_id,
3445 'videoId': watch_endpoint.get('videoId') or last_id,
3446 'index': watch_endpoint.get('index') or len(videos),
3447 'params': watch_endpoint.get('params') or 'OAE%3D'
3448 }
3449 response = self._extract_response(
3450 item_id='%s page %d' % (playlist_id, page_num),
3451 query=query,
3452 ep='next',
3453 headers=headers,
3454 check_get_keys='contents'
3455 )
cd7c66cf 3456 playlist = try_get(
79360d99 3457 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3458
79360d99 3459 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3460 title = playlist.get('title') or try_get(
3461 data, lambda x: x['titleText']['simpleText'], compat_str)
3462 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3463
3464 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3465 playlist_url = urljoin(url, try_get(
3466 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3467 compat_str))
3468 if playlist_url and playlist_url != url:
3469 return self.url_result(
3470 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3471 video_title=title)
cd7c66cf 3472
8bdd16b4 3473 return self.playlist_result(
79360d99 3474 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3475 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3476
95c01b6c 3477 @staticmethod
3478 def _extract_alerts(data):
3479 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3480 if not isinstance(alert_dict, dict):
3481 continue
3482 for alert in alert_dict.values():
3483 alert_type = alert.get('type')
3484 if not alert_type:
02ced43c 3485 continue
95c01b6c 3486 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
3487 if message:
3488 yield alert_type, message
3489 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3490 message += try_get(run, lambda x: x['text'], compat_str)
3491 if message:
3492 yield alert_type, message
3493
3494 def _report_alerts(self, alerts, expected=True):
3ffc7c89 3495 errors = []
3496 warnings = []
95c01b6c 3497 for alert_type, alert_message in alerts:
f3eaa8dd 3498 if alert_type.lower() == 'error':
3ffc7c89 3499 errors.append([alert_type, alert_message])
f3eaa8dd 3500 else:
3ffc7c89 3501 warnings.append([alert_type, alert_message])
f3eaa8dd 3502
3ffc7c89 3503 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3504 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3505 if errors:
3506 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3507
95c01b6c 3508 def _extract_and_report_alerts(self, data, *args, **kwargs):
3509 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
3510
358de58c 3511 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3512 """
3513 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3514 """
3515 sidebar_renderer = try_get(
5d342002 3516 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3517 if not sidebar_renderer:
3518 return
3519 browse_id = params = None
358de58c 3520 for item in sidebar_renderer:
3521 if not isinstance(item, dict):
3522 continue
3523 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3524 menu_renderer = try_get(
3525 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3526 for menu_item in menu_renderer:
3527 if not isinstance(menu_item, dict):
3528 continue
3529 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3530 text = try_get(
3531 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3532 if not text or text.lower() != 'show unavailable videos':
3533 continue
3534 browse_endpoint = try_get(
3535 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3536 browse_id = browse_endpoint.get('browseId')
3537 params = browse_endpoint.get('params')
5d342002 3538 break
3539
3540 ytcfg = self._extract_ytcfg(item_id, webpage)
3541 headers = self._generate_api_headers(
3542 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3543 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3544 visitor_data=try_get(
3545 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3546 query = {
3547 'params': params or 'wgYCCAA=',
3548 'browseId': browse_id or 'VL%s' % item_id
3549 }
3550 return self._extract_response(
3551 item_id=item_id, headers=headers, query=query,
3552 check_get_keys='contents', fatal=False,
3553 note='Downloading API JSON with unavailable videos')
358de58c 3554
79360d99 3555 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3556 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3557 response = None
3558 last_error = None
3559 count = -1
a06916d9 3560 retries = self.get_param('extractor_retries', 3)
79360d99 3561 if check_get_keys is None:
3562 check_get_keys = []
3563 while count < retries:
3564 count += 1
3565 if last_error:
3566 self.report_warning('%s. Retrying ...' % last_error)
3567 try:
3568 response = self._call_api(
3569 ep=ep, fatal=True, headers=headers,
358de58c 3570 video_id=item_id, query=query,
79360d99 3571 context=self._extract_context(ytcfg),
3572 api_key=self._extract_api_key(ytcfg),
3573 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3574 except ExtractorError as e:
3575 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3576 # Downloading page may result in intermittent 5xx HTTP error
3577 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3578 last_error = 'HTTP Error %s' % e.cause.code
3579 if count < retries:
3580 continue
358de58c 3581 if fatal:
3582 raise
3583 else:
3584 self.report_warning(error_to_compat_str(e))
3585 return
3586
79360d99 3587 else:
3588 # Youtube may send alerts if there was an issue with the continuation page
95c01b6c 3589 self._extract_and_report_alerts(response, expected=False)
79360d99 3590 if not check_get_keys or dict_get(response, check_get_keys):
3591 break
3592 # Youtube sometimes sends incomplete data
3593 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3594 last_error = 'Incomplete data received'
3595 if count >= retries:
358de58c 3596 if fatal:
3597 raise ExtractorError(last_error)
3598 else:
3599 self.report_warning(last_error)
3600 return
79360d99 3601 return response
3602
cd7c66cf 3603 def _extract_webpage(self, url, item_id):
a06916d9 3604 retries = self.get_param('extractor_retries', 3)
62bff2c1 3605 count = -1
c705177d 3606 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3607 while count < retries:
62bff2c1 3608 count += 1
14fdfea9 3609 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3610 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3611 if count:
c705177d 3612 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3613 webpage = self._download_webpage(
3614 url, item_id,
cd7c66cf 3615 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3616 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3617 if data.get('contents') or data.get('currentVideoEndpoint'):
3618 break
95c01b6c 3619 # Extract alerts here only when there is error
3620 self._extract_and_report_alerts(data)
c705177d 3621 if count >= retries:
6a39ee13 3622 raise ExtractorError(last_error)
cd7c66cf 3623 return webpage, data
3624
9297939e 3625 @staticmethod
3626 def _smuggle_data(entries, data):
3627 for entry in entries:
3628 if data:
3629 entry['url'] = smuggle_url(entry['url'], data)
3630 yield entry
3631
cd7c66cf 3632 def _real_extract(self, url):
9297939e 3633 url, smuggled_data = unsmuggle_url(url, {})
3634 if self.is_music_url(url):
3635 smuggled_data['is_music_url'] = True
fe03a6cd 3636 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3637 if info_dict.get('entries'):
3638 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3639 return info_dict
3640
fe03a6cd 3641 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3642
3643 def __real_extract(self, url, smuggled_data):
cd7c66cf 3644 item_id = self._match_id(url)
3645 url = compat_urlparse.urlunparse(
3646 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3647 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3648
fe03a6cd 3649 def get_mobj(url):
3650 mobj = self._url_re.match(url).groupdict()
07cce701 3651 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 3652 return mobj
3653
3654 mobj = get_mobj(url)
3655 # Youtube returns incomplete data if tabname is not lower case
3656 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
3657
3658 if is_channel:
3659 if smuggled_data.get('is_music_url'):
3660 if item_id[:2] == 'VL':
3661 # Youtube music VL channels have an equivalent playlist
3662 item_id = item_id[2:]
3663 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 3664 elif item_id[:2] == 'MP':
3665 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
3666 item_id = self._search_regex(
3667 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
3668 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
3669 'playlist id')
3670 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 3671 elif mobj['channel_type'] == 'browse':
3672 # Youtube music /browse/ should be changed to /channel/
3673 pre = 'https://www.youtube.com/channel/%s' % item_id
3674 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
3675 # Home URLs should redirect to /videos/
6a39ee13 3676 self.report_warning(
cd7c66cf 3677 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3678 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 3679 tab = '/videos'
3680
3681 url = ''.join((pre, tab, post))
3682 mobj = get_mobj(url)
cd7c66cf 3683
3684 # Handle both video/playlist URLs
201c1459 3685 qs = parse_qs(url)
cd7c66cf 3686 video_id = qs.get('v', [None])[0]
3687 playlist_id = qs.get('list', [None])[0]
3688
fe03a6cd 3689 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 3690 if not playlist_id:
fe03a6cd 3691 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 3692 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 3693 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 3694 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3695 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 3696 mobj = get_mobj(url)
cd7c66cf 3697
3698 if video_id and playlist_id:
a06916d9 3699 if self.get_param('noplaylist'):
cd7c66cf 3700 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3701 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3702 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3703
3704 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3705
18db7548 3706 tabs = try_get(
3707 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3708 if tabs:
3709 selected_tab = self._extract_selected_tab(tabs)
3710 tab_name = selected_tab.get('title', '')
3711 if (mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]
3712 and 'no-youtube-channel-redirect' not in compat_opts):
3713 if not mobj['not_channel'] and item_id[:2] == 'UC':
3714 # Topic channels don't have /videos. Use the equivalent playlist instead
3715 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
3716 pl_id = 'UU%s' % item_id[2:]
3717 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
3718 try:
3719 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
3720 for alert_type, alert_message in self._extract_alerts(pl_data):
3721 if alert_type == 'error':
3722 raise ExtractorError('Youtube said: %s' % alert_message)
3723 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
3724 except ExtractorError:
3725 self.report_warning('The playlist gave error. Falling back to channel URL')
3726 else:
3727 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
3728
3729 self.write_debug('Final URL: %s' % url)
3730
358de58c 3731 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3732 if 'no-youtube-unavailable-videos' not in compat_opts:
3733 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 3734 self._extract_and_report_alerts(data)
358de58c 3735
8bdd16b4 3736 tabs = try_get(
3737 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3738 if tabs:
d069eca7 3739 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3740
8bdd16b4 3741 playlist = try_get(
3742 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3743 if playlist:
79360d99 3744 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3745
a0566bbf 3746 video_id = try_get(
3747 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3748 compat_str) or video_id
8bdd16b4 3749 if video_id:
6a39ee13 3750 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3751 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3752
8bdd16b4 3753 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3754
c5e8d7af 3755
8bdd16b4 3756class YoutubePlaylistIE(InfoExtractor):
3757 IE_DESC = 'YouTube.com playlists'
3758 _VALID_URL = r'''(?x)(?:
3759 (?:https?://)?
3760 (?:\w+\.)?
3761 (?:
3762 (?:
3763 youtube(?:kids)?\.com|
29f7c58a 3764 invidio\.us
8bdd16b4 3765 )
3766 /.*?\?.*?\blist=
3767 )?
3768 (?P<id>%(playlist_id)s)
3769 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3770 IE_NAME = 'youtube:playlist'
cdc628a4 3771 _TESTS = [{
8bdd16b4 3772 'note': 'issue #673',
3773 'url': 'PLBB231211A4F62143',
cdc628a4 3774 'info_dict': {
8bdd16b4 3775 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3776 'id': 'PLBB231211A4F62143',
3777 'uploader': 'Wickydoo',
3778 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3779 },
3780 'playlist_mincount': 29,
3781 }, {
3782 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3783 'info_dict': {
3784 'title': 'YDL_safe_search',
3785 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3786 },
3787 'playlist_count': 2,
3788 'skip': 'This playlist is private',
9558dcec 3789 }, {
8bdd16b4 3790 'note': 'embedded',
3791 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3792 'playlist_count': 4,
9558dcec 3793 'info_dict': {
8bdd16b4 3794 'title': 'JODA15',
3795 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3796 'uploader': 'milan',
3797 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3798 }
cdc628a4 3799 }, {
8bdd16b4 3800 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3801 'playlist_mincount': 982,
3802 'info_dict': {
3803 'title': '2018 Chinese New Singles (11/6 updated)',
3804 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3805 'uploader': 'LBK',
3806 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3807 }
daa0df9e 3808 }, {
29f7c58a 3809 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3810 'only_matching': True,
3811 }, {
3812 # music album playlist
3813 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3814 'only_matching': True,
3815 }]
3816
3817 @classmethod
3818 def suitable(cls, url):
201c1459 3819 if YoutubeTabIE.suitable(url):
3820 return False
1bdae7d3 3821 # Hack for lazy extractors until more generic solution is implemented
3822 # (see #28780)
3823 from .youtube import parse_qs
201c1459 3824 qs = parse_qs(url)
3825 if qs.get('v', [None])[0]:
3826 return False
3827 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3828
3829 def _real_extract(self, url):
3830 playlist_id = self._match_id(url)
9297939e 3831 is_music_url = self.is_music_url(url)
3832 url = update_url_query(
3833 'https://www.youtube.com/playlist',
3834 parse_qs(url) or {'list': playlist_id})
3835 if is_music_url:
3836 url = smuggle_url(url, {'is_music_url': True})
3837 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 3838
3839
3840class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3841 IE_DESC = 'youtu.be'
29f7c58a 3842 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3843 _TESTS = [{
8bdd16b4 3844 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3845 'info_dict': {
3846 'id': 'yeWKywCrFtk',
3847 'ext': 'mp4',
3848 'title': 'Small Scale Baler and Braiding Rugs',
3849 'uploader': 'Backus-Page House Museum',
3850 'uploader_id': 'backuspagemuseum',
3851 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3852 'upload_date': '20161008',
3853 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3854 'categories': ['Nonprofits & Activism'],
3855 'tags': list,
3856 'like_count': int,
3857 'dislike_count': int,
3858 },
3859 'params': {
3860 'noplaylist': True,
3861 'skip_download': True,
3862 },
39e7107d 3863 }, {
8bdd16b4 3864 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3865 'only_matching': True,
cdc628a4
PH
3866 }]
3867
8bdd16b4 3868 def _real_extract(self, url):
29f7c58a 3869 mobj = re.match(self._VALID_URL, url)
3870 video_id = mobj.group('id')
3871 playlist_id = mobj.group('playlist_id')
8bdd16b4 3872 return self.url_result(
29f7c58a 3873 update_url_query('https://www.youtube.com/watch', {
3874 'v': video_id,
3875 'list': playlist_id,
3876 'feature': 'youtu.be',
3877 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3878
3879
3880class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3881 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3882 _VALID_URL = r'ytuser:(?P<id>.+)'
3883 _TESTS = [{
3884 'url': 'ytuser:phihag',
3885 'only_matching': True,
3886 }]
3887
3888 def _real_extract(self, url):
3889 user_id = self._match_id(url)
3890 return self.url_result(
3891 'https://www.youtube.com/user/%s' % user_id,
3892 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3893
b05654f0 3894
3d3dddc9 3895class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3896 IE_NAME = 'youtube:favorites'
3897 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3898 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3899 _LOGIN_REQUIRED = True
3900 _TESTS = [{
3901 'url': ':ytfav',
3902 'only_matching': True,
3903 }, {
3904 'url': ':ytfavorites',
3905 'only_matching': True,
3906 }]
3907
3908 def _real_extract(self, url):
3909 return self.url_result(
3910 'https://www.youtube.com/playlist?list=LL',
3911 ie=YoutubeTabIE.ie_key())
3912
3913
79360d99 3914class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3915 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3916 # there doesn't appear to be a real limit, for example if you search for
3917 # 'python' you get more than 8.000.000 results
3918 _MAX_RESULTS = float('inf')
78caa52a 3919 IE_NAME = 'youtube:search'
b05654f0 3920 _SEARCH_KEY = 'ytsearch'
6c894ea1 3921 _SEARCH_PARAMS = None
9dd8e46a 3922 _TESTS = []
b05654f0 3923
6c894ea1 3924 def _entries(self, query, n):
a5c56234 3925 data = {'query': query}
6c894ea1
U
3926 if self._SEARCH_PARAMS:
3927 data['params'] = self._SEARCH_PARAMS
3928 total = 0
3929 for page_num in itertools.count(1):
79360d99 3930 search = self._extract_response(
3931 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3932 check_get_keys=('contents', 'onResponseReceivedCommands')
3933 )
6c894ea1 3934 if not search:
b4c08069 3935 break
6c894ea1
U
3936 slr_contents = try_get(
3937 search,
3938 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3939 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3940 list)
3941 if not slr_contents:
a22b2fd1 3942 break
0366ae87 3943
0366ae87
M
3944 # Youtube sometimes adds promoted content to searches,
3945 # changing the index location of videos and token.
3946 # So we search through all entries till we find them.
30a074c2 3947 continuation_token = None
3948 for slr_content in slr_contents:
a96c6d15 3949 if continuation_token is None:
3950 continuation_token = try_get(
3951 slr_content,
3952 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3953 compat_str)
3954
30a074c2 3955 isr_contents = try_get(
3956 slr_content,
3957 lambda x: x['itemSectionRenderer']['contents'],
3958 list)
9da76d30 3959 if not isr_contents:
30a074c2 3960 continue
3961 for content in isr_contents:
3962 if not isinstance(content, dict):
3963 continue
3964 video = content.get('videoRenderer')
3965 if not isinstance(video, dict):
3966 continue
3967 video_id = video.get('videoId')
3968 if not video_id:
3969 continue
3970
3971 yield self._extract_video(video)
3972 total += 1
3973 if total == n:
3974 return
0366ae87 3975
0366ae87 3976 if not continuation_token:
6c894ea1 3977 break
0366ae87 3978 data['continuation'] = continuation_token
b05654f0 3979
6c894ea1
U
3980 def _get_n_results(self, query, n):
3981 """Get a specified number of results for a query"""
3982 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3983
c9ae7b95 3984
a3dd9248 3985class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3986 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3987 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3988 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3989 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3990
c9ae7b95 3991
386e1dd9 3992class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3993 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3994 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3995 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3996 # _MAX_RESULTS = 100
3462ffa8 3997 _TESTS = [{
3998 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3999 'playlist_mincount': 5,
4000 'info_dict': {
4001 'title': 'youtube-dl test video',
4002 }
4003 }, {
4004 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4005 'only_matching': True,
4006 }]
4007
386e1dd9 4008 @classmethod
4009 def _make_valid_url(cls):
4010 return cls._VALID_URL
4011
3462ffa8 4012 def _real_extract(self, url):
386e1dd9 4013 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4014 query = (qs.get('search_query') or qs.get('q'))[0]
4015 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4016 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4017
4018
4019class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4020 """
25f14e9f 4021 Base class for feed extractors
3d3dddc9 4022 Subclasses must define the _FEED_NAME property.
d7ae0639 4023 """
b2e8bc1b 4024 _LOGIN_REQUIRED = True
ef2f3c7f 4025 _TESTS = []
d7ae0639
JMF
4026
4027 @property
4028 def IE_NAME(self):
78caa52a 4029 return 'youtube:%s' % self._FEED_NAME
04cc9617 4030
3853309f 4031 def _real_extract(self, url):
3d3dddc9 4032 return self.url_result(
4033 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4034 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4035
4036
ef2f3c7f 4037class YoutubeWatchLaterIE(InfoExtractor):
4038 IE_NAME = 'youtube:watchlater'
70d5c17b 4039 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4040 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4041 _TESTS = [{
8bdd16b4 4042 'url': ':ytwatchlater',
bc7a9cd8
S
4043 'only_matching': True,
4044 }]
25f14e9f
S
4045
4046 def _real_extract(self, url):
ef2f3c7f 4047 return self.url_result(
4048 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4049
4050
25f14e9f
S
4051class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4052 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4053 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4054 _FEED_NAME = 'recommended'
3d3dddc9 4055 _TESTS = [{
4056 'url': ':ytrec',
4057 'only_matching': True,
4058 }, {
4059 'url': ':ytrecommended',
4060 'only_matching': True,
4061 }, {
4062 'url': 'https://youtube.com',
4063 'only_matching': True,
4064 }]
1ed5b5c9 4065
1ed5b5c9 4066
25f14e9f 4067class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4068 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4069 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4070 _FEED_NAME = 'subscriptions'
3d3dddc9 4071 _TESTS = [{
4072 'url': ':ytsubs',
4073 'only_matching': True,
4074 }, {
4075 'url': ':ytsubscriptions',
4076 'only_matching': True,
4077 }]
1ed5b5c9 4078
1ed5b5c9 4079
25f14e9f 4080class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4081 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4082 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4083 _FEED_NAME = 'history'
3d3dddc9 4084 _TESTS = [{
4085 'url': ':ythistory',
4086 'only_matching': True,
4087 }]
1ed5b5c9
JMF
4088
4089
15870e90
PH
4090class YoutubeTruncatedURLIE(InfoExtractor):
4091 IE_NAME = 'youtube:truncated_url'
4092 IE_DESC = False # Do not list
975d35db 4093 _VALID_URL = r'''(?x)
b95aab84
PH
4094 (?:https?://)?
4095 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4096 (?:watch\?(?:
c4808c60 4097 feature=[a-z_]+|
b95aab84
PH
4098 annotation_id=annotation_[^&]+|
4099 x-yt-cl=[0-9]+|
c1708b89 4100 hl=[^&]*|
287be8c6 4101 t=[0-9]+
b95aab84
PH
4102 )?
4103 |
4104 attribution_link\?a=[^&]+
4105 )
4106 $
975d35db 4107 '''
15870e90 4108
c4808c60 4109 _TESTS = [{
2d3d2997 4110 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4111 'only_matching': True,
dc2fc736 4112 }, {
2d3d2997 4113 'url': 'https://www.youtube.com/watch?',
dc2fc736 4114 'only_matching': True,
b95aab84
PH
4115 }, {
4116 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4117 'only_matching': True,
4118 }, {
4119 'url': 'https://www.youtube.com/watch?feature=foo',
4120 'only_matching': True,
c1708b89
PH
4121 }, {
4122 'url': 'https://www.youtube.com/watch?hl=en-GB',
4123 'only_matching': True,
287be8c6
PH
4124 }, {
4125 'url': 'https://www.youtube.com/watch?t=2372',
4126 'only_matching': True,
c4808c60
PH
4127 }]
4128
15870e90
PH
4129 def _real_extract(self, url):
4130 raise ExtractorError(
78caa52a
PH
4131 'Did you forget to quote the URL? Remember that & is a meta '
4132 'character in most shells, so you want to put the URL in quotes, '
3867038a 4133 'like youtube-dl '
2d3d2997 4134 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4135 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4136 expected=True)
772fd5cc
PH
4137
4138
4139class YoutubeTruncatedIDIE(InfoExtractor):
4140 IE_NAME = 'youtube:truncated_id'
4141 IE_DESC = False # Do not list
b95aab84 4142 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4143
4144 _TESTS = [{
4145 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4146 'only_matching': True,
4147 }]
4148
4149 def _real_extract(self, url):
4150 video_id = self._match_id(url)
4151 raise ExtractorError(
4152 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4153 expected=True)