]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Fix id sanitization in filenames
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
bea74222 70 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
9d5d4d64 88
89 def warn(message):
90 self.report_warning(message)
91
92 # username+password login is broken
93 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
94 self.raise_login_required(
95 'Login details are needed to download this content', method='cookies')
68217024 96 username, password = self._get_login_info()
9d5d4d64 97 if username:
98 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
99 return
100 # Everything below this is broken!
101
b2e8bc1b
JMF
102 # No authentication to be performed
103 if username is None:
a06916d9 104 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 105 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 106 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 107 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 108 return True
b2e8bc1b 109
7cc3570e
PH
110 login_page = self._download_webpage(
111 self._LOGIN_URL, None,
69ea8ca4
PH
112 note='Downloading login page',
113 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
114 if login_page is False:
115 return
b2e8bc1b 116
1212e997 117 login_form = self._hidden_inputs(login_page)
c5e8d7af 118
e00eb564
S
119 def req(url, f_req, note, errnote):
120 data = login_form.copy()
121 data.update({
122 'pstMsg': 1,
123 'checkConnection': 'youtube',
124 'checkedDomains': 'youtube',
125 'hl': 'en',
126 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 127 'f.req': json.dumps(f_req),
e00eb564
S
128 'flowName': 'GlifWebSignIn',
129 'flowEntry': 'ServiceLogin',
baf67a60
S
130 # TODO: reverse actual botguard identifier generation algo
131 'bgRequest': '["identifier",""]',
041bc3ad 132 })
e00eb564
S
133 return self._download_json(
134 url, None, note=note, errnote=errnote,
135 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
136 fatal=False,
137 data=urlencode_postdata(data), headers={
138 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
139 'Google-Accounts-XSRF': 1,
140 })
141
3995d37d
S
142 lookup_req = [
143 username,
144 None, [], None, 'US', None, None, 2, False, True,
145 [
146 None, None,
147 [2, 1, None, 1,
148 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
149 None, [], 4],
150 1, [None, None, []], None, None, None, True
151 ],
152 username,
153 ]
154
e00eb564 155 lookup_results = req(
3995d37d 156 self._LOOKUP_URL, lookup_req,
e00eb564
S
157 'Looking up account info', 'Unable to look up account info')
158
159 if lookup_results is False:
160 return False
041bc3ad 161
3995d37d
S
162 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
163 if not user_hash:
164 warn('Unable to extract user hash')
165 return False
166
167 challenge_req = [
168 user_hash,
169 None, 1, None, [1, None, None, None, [password, None, True]],
170 [
171 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
172 1, [None, None, []], None, None, None, True
173 ]]
83317f69 174
3995d37d
S
175 challenge_results = req(
176 self._CHALLENGE_URL, challenge_req,
177 'Logging in', 'Unable to log in')
83317f69 178
3995d37d 179 if challenge_results is False:
e00eb564 180 return
83317f69 181
3995d37d
S
182 login_res = try_get(challenge_results, lambda x: x[0][5], list)
183 if login_res:
184 login_msg = try_get(login_res, lambda x: x[5], compat_str)
185 warn(
186 'Unable to login: %s' % 'Invalid password'
187 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
188 return False
189
190 res = try_get(challenge_results, lambda x: x[0][-1], list)
191 if not res:
192 warn('Unable to extract result entry')
193 return False
194
9a6628aa
S
195 login_challenge = try_get(res, lambda x: x[0][0], list)
196 if login_challenge:
197 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
198 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
199 # SEND_SUCCESS - TFA code has been successfully sent to phone
200 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 201 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
202 if status == 'QUOTA_EXCEEDED':
203 warn('Exceeded the limit of TFA codes, try later')
204 return False
205
206 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
207 if not tl:
208 warn('Unable to extract TL')
209 return False
210
211 tfa_code = self._get_tfa_info('2-step verification code')
212
213 if not tfa_code:
214 warn(
215 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
216 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
217 return False
218
219 tfa_code = remove_start(tfa_code, 'G-')
220
221 tfa_req = [
222 user_hash, None, 2, None,
223 [
224 9, None, None, None, None, None, None, None,
225 [None, tfa_code, True, 2]
226 ]]
227
228 tfa_results = req(
229 self._TFA_URL.format(tl), tfa_req,
230 'Submitting TFA code', 'Unable to submit TFA code')
231
232 if tfa_results is False:
233 return False
234
235 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
236 if tfa_res:
237 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
238 warn(
239 'Unable to finish TFA: %s' % 'Invalid TFA code'
240 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
241 return False
242
243 check_cookie_url = try_get(
244 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
245 else:
246 CHALLENGES = {
247 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
248 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
249 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
250 }
251 challenge = CHALLENGES.get(
252 challenge_str,
253 '%s returned error %s.' % (self.IE_NAME, challenge_str))
254 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
255 return False
3995d37d
S
256 else:
257 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
258
259 if not check_cookie_url:
260 warn('Unable to extract CheckCookie URL')
261 return False
e00eb564
S
262
263 check_cookie_results = self._download_webpage(
3995d37d
S
264 check_cookie_url, None, 'Checking cookie', fatal=False)
265
266 if check_cookie_results is False:
267 return False
e00eb564 268
3995d37d
S
269 if 'https://myaccount.google.com/' not in check_cookie_results:
270 warn('Unable to log in')
b2e8bc1b 271 return False
e00eb564 272
b2e8bc1b
JMF
273 return True
274
cce889b9 275 def _initialize_consent(self):
276 cookies = self._get_cookies('https://www.youtube.com/')
277 if cookies.get('__Secure-3PSID'):
278 return
279 consent_id = None
280 consent = cookies.get('CONSENT')
281 if consent:
282 if 'YES' in consent.value:
283 return
284 consent_id = self._search_regex(
285 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
286 if not consent_id:
287 consent_id = random.randint(100, 999)
288 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 289
b2e8bc1b 290 def _real_initialize(self):
cce889b9 291 self._initialize_consent()
b2e8bc1b
JMF
292 if self._downloader is None:
293 return
b2e8bc1b
JMF
294 if not self._login():
295 return
c5e8d7af 296
f4f751af 297 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
298 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 299 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 300 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
301 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 302
a5c56234 303 def _generate_sapisidhash_header(self):
1974e99f 304 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
305 # See: https://github.com/yt-dlp/yt-dlp/issues/393
306 yt_cookies = self._get_cookies('https://www.youtube.com')
307 sapisid_cookie = dict_get(
308 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
309 if sapisid_cookie is None:
310 return
311 time_now = round(time.time())
1974e99f 312 # SAPISID cookie is required if not already present
313 if not yt_cookies.get('SAPISID'):
314 self._set_cookie(
315 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
316 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
317 sapisidhash = hashlib.sha1(
318 f'{time_now} {sapisid_cookie.value} https://www.youtube.com'.encode('utf-8')).hexdigest()
319 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
320
321 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 322 note='Downloading API JSON', errnote='Unable to download API page',
323 context=None, api_key=None):
324
325 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 326 data.update(query)
f4f751af 327 real_headers = self._generate_api_headers()
328 real_headers.update({'content-type': 'application/json'})
329 if headers:
330 real_headers.update(headers)
545cc85d 331 return self._download_json(
a5c56234
M
332 'https://www.youtube.com/youtubei/v1/%s' % ep,
333 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 334 data=json.dumps(data).encode('utf8'), headers=real_headers,
335 query={'key': api_key or self._extract_api_key()})
336
337 def _extract_api_key(self, ytcfg=None):
338 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 339
8bdd16b4 340 def _extract_yt_initial_data(self, video_id, webpage):
341 return self._parse_json(
342 self._search_regex(
29f7c58a 343 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 344 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 345 video_id)
0c148415 346
a1c5d2ca
M
347 def _extract_identity_token(self, webpage, item_id):
348 ytcfg = self._extract_ytcfg(item_id, webpage)
349 if ytcfg:
350 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
351 if token:
352 return token
353 return self._search_regex(
354 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
355 'identity token', default=None)
356
357 @staticmethod
358 def _extract_account_syncid(data):
8ea3f7b9 359 """
360 Extract syncId required to download private playlists of secondary channels
361 @param data Either response or ytcfg
362 """
363 sync_ids = (try_get(
364 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
365 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
366 if len(sync_ids) >= 2 and sync_ids[1]:
367 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
368 # and just "user_syncid||" for primary channel. We only want the channel_syncid
369 return sync_ids[0]
8ea3f7b9 370 # ytcfg includes channel_syncid if on secondary channel
371 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 372
29f7c58a 373 def _extract_ytcfg(self, video_id, webpage):
8c54a305 374 if not webpage:
375 return {}
29f7c58a 376 return self._parse_json(
377 self._search_regex(
378 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 379 default='{}'), video_id, fatal=False) or {}
380
381 def __extract_client_version(self, ytcfg):
382 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
383
384 def _extract_context(self, ytcfg=None):
385 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
386 if context:
387 return context
388
389 # Recreate the client context (required)
390 client_version = self.__extract_client_version(ytcfg)
391 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
392 context = {
393 'client': {
394 'clientName': client_name,
395 'clientVersion': client_version,
396 }
397 }
398 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
399 if visitor_data:
400 context['client']['visitorData'] = visitor_data
401 return context
402
403 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
404 headers = {
405 'X-YouTube-Client-Name': '1',
406 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
407 }
408 if identity_token:
409 headers['x-youtube-identity-token'] = identity_token
410 if account_syncid:
411 headers['X-Goog-PageId'] = account_syncid
412 headers['X-Goog-AuthUser'] = 0
413 if visitor_data:
414 headers['x-goog-visitor-id'] = visitor_data
415 auth = self._generate_sapisidhash_header()
416 if auth is not None:
417 headers['Authorization'] = auth
418 headers['X-Origin'] = 'https://www.youtube.com'
419 return headers
29f7c58a 420
9297939e 421 @staticmethod
422 def is_music_url(url):
423 return re.match(r'https?://music\.youtube\.com/', url) is not None
424
30a074c2 425 def _extract_video(self, renderer):
426 video_id = renderer.get('videoId')
427 title = try_get(
428 renderer,
429 (lambda x: x['title']['runs'][0]['text'],
430 lambda x: x['title']['simpleText']), compat_str)
431 description = try_get(
432 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
433 compat_str)
434 duration = parse_duration(try_get(
435 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
436 view_count_text = try_get(
437 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
438 view_count = str_to_int(self._search_regex(
439 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
440 'view count', default=None))
441 uploader = try_get(
bc2ca1bb 442 renderer,
443 (lambda x: x['ownerText']['runs'][0]['text'],
444 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 445 return {
39ed931e 446 '_type': 'url',
30a074c2 447 'ie_key': YoutubeIE.ie_key(),
448 'id': video_id,
449 'url': video_id,
450 'title': title,
451 'description': description,
452 'duration': duration,
453 'view_count': view_count,
454 'uploader': uploader,
455 }
456
0c148415 457
360e1ca5 458class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 459 IE_DESC = 'YouTube.com'
bc2ca1bb 460 _INVIDIOUS_SITES = (
461 # invidious-redirect websites
462 r'(?:www\.)?redirect\.invidious\.io',
463 r'(?:(?:www|dev)\.)?invidio\.us',
464 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
465 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 466 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 467 r'(?:(?:www|au)\.)?ytprivate\.com',
468 r'(?:www\.)?invidious\.namazso\.eu',
469 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 470 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
471 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
472 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
473 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
474 # youtube-dl invidious instances list
475 r'(?:(?:www|no)\.)?invidiou\.sh',
476 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
477 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 478 r'(?:www\.)?invidious\.mastodon\.host',
479 r'(?:www\.)?invidious\.zapashcanon\.fr',
480 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 481 r'(?:www\.)?invidious\.tinfoil-hat\.net',
482 r'(?:www\.)?invidious\.himiko\.cloud',
483 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 484 r'(?:www\.)?invidious\.tube',
485 r'(?:www\.)?invidiou\.site',
486 r'(?:www\.)?invidious\.site',
487 r'(?:www\.)?invidious\.xyz',
488 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 489 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 490 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 491 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 492 r'(?:www\.)?tube\.poal\.co',
493 r'(?:www\.)?tube\.connect\.cafe',
494 r'(?:www\.)?vid\.wxzm\.sx',
495 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 496 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 497 r'(?:www\.)?yewtu\.be',
498 r'(?:www\.)?yt\.elukerio\.org',
499 r'(?:www\.)?yt\.lelux\.fi',
500 r'(?:www\.)?invidious\.ggc-project\.de',
501 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 502 r'(?:www\.)?ytprivate\.com',
503 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 504 r'(?:www\.)?invidious\.toot\.koeln',
505 r'(?:www\.)?invidious\.fdn\.fr',
506 r'(?:www\.)?watch\.nettohikari\.com',
507 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
508 r'(?:www\.)?qklhadlycap4cnod\.onion',
509 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
510 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
511 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
512 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
513 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
514 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
515 )
cb7dfeea 516 _VALID_URL = r"""(?x)^
c5e8d7af 517 (
edb53e2d 518 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 519 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
520 (?:www\.)?deturl\.com/www\.youtube\.com|
521 (?:www\.)?pwnyoutube\.com|
522 (?:www\.)?hooktube\.com|
523 (?:www\.)?yourepeat\.com|
524 tube\.majestyc\.net|
525 %(invidious)s|
526 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
527 (?:.*?\#/)? # handle anchor (#/) redirect urls
528 (?: # the various things that can precede the ID:
ac7553d0 529 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 530 |(?: # or the v= param in all its forms
f7000f3a 531 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 532 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 533 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
534 v=
535 )
f4b05232 536 ))
cbaed4bb
S
537 |(?:
538 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
539 vid\.plus| # or vid.plus/xxxx
540 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 541 %(invidious)s
cbaed4bb 542 )/
edb53e2d 543 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 544 )
c5e8d7af 545 )? # all until now is optional -> you can pass the naked ID
201c1459 546 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 547 (?(1).+)? # if we found the ID, everything can follow
9297939e 548 (?:\#|$)""" % {
bc2ca1bb 549 'invidious': '|'.join(_INVIDIOUS_SITES),
550 }
e40c758c 551 _PLAYER_INFO_RE = (
cc2db878 552 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
553 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 554 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 555 )
2c62dc26 556 _formats = {
c2d3cb4c 557 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
558 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
559 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
560 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
561 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
562 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
563 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
564 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 565 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 566 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
567 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
568 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
569 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
570 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
571 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 572 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 573 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
574 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 575
576
577 # 3D videos
c2d3cb4c 578 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
579 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
580 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
581 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 582 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
583 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
584 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 585
96fb5605 586 # Apple HTTP Live Streaming
11f12195 587 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 588 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
589 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
590 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
591 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
592 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 593 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
594 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
595
596 # DASH mp4 video
d23028a8
S
597 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
598 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
599 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
600 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
601 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 602 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
603 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
604 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
605 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
606 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
607 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
608 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 609
f6f1fc92 610 # Dash mp4 audio
d23028a8
S
611 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
612 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
613 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
614 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
615 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
616 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
617 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
618
619 # Dash webm
d23028a8
S
620 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
621 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
622 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
623 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
624 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
625 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
626 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
627 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
628 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
629 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
630 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
631 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
632 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
633 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
634 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 635 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
636 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
637 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
638 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
639 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
640 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
641 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
642
643 # Dash webm audio
d23028a8
S
644 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
645 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 646
0857baad 647 # Dash webm audio with opus inside
d23028a8
S
648 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
649 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
650 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 651
ce6b9a2d
PH
652 # RTMP (unnamed)
653 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
654
655 # av01 video only formats sometimes served with "unknown" codecs
656 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
657 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
658 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
659 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 660 }
29f7c58a 661 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 662
fd5c4aab
S
663 _GEO_BYPASS = False
664
78caa52a 665 IE_NAME = 'youtube'
2eb88d95
PH
666 _TESTS = [
667 {
2d3d2997 668 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
669 'info_dict': {
670 'id': 'BaW_jenozKc',
671 'ext': 'mp4',
3867038a 672 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
673 'uploader': 'Philipp Hagemeister',
674 'uploader_id': 'phihag',
ec85ded8 675 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
676 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
677 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 678 'upload_date': '20121002',
3867038a 679 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 680 'categories': ['Science & Technology'],
3867038a 681 'tags': ['youtube-dl'],
556dbe7f 682 'duration': 10,
dbdaaa23 683 'view_count': int,
3e7c1224
PH
684 'like_count': int,
685 'dislike_count': int,
7c80519c 686 'start_time': 1,
297a564b 687 'end_time': 9,
2eb88d95 688 }
0e853ca4 689 },
fccd3771 690 {
4bc3a23e
PH
691 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
692 'note': 'Embed-only video (#1746)',
693 'info_dict': {
694 'id': 'yZIXLfi8CZQ',
695 'ext': 'mp4',
696 'upload_date': '20120608',
697 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
698 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
699 'uploader': 'SET India',
94bfcd23 700 'uploader_id': 'setindia',
ec85ded8 701 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 702 'age_limit': 18,
545cc85d 703 },
704 'skip': 'Private video',
fccd3771 705 },
11b56058 706 {
8bdd16b4 707 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
708 'note': 'Use the first video ID in the URL',
709 'info_dict': {
710 'id': 'BaW_jenozKc',
711 'ext': 'mp4',
3867038a 712 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
713 'uploader': 'Philipp Hagemeister',
714 'uploader_id': 'phihag',
ec85ded8 715 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 716 'upload_date': '20121002',
3867038a 717 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 718 'categories': ['Science & Technology'],
3867038a 719 'tags': ['youtube-dl'],
556dbe7f 720 'duration': 10,
dbdaaa23 721 'view_count': int,
11b56058
PM
722 'like_count': int,
723 'dislike_count': int,
34a7de29
S
724 },
725 'params': {
726 'skip_download': True,
727 },
11b56058 728 },
dd27fd17 729 {
2d3d2997 730 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
731 'note': '256k DASH audio (format 141) via DASH manifest',
732 'info_dict': {
733 'id': 'a9LDPn-MO4I',
734 'ext': 'm4a',
735 'upload_date': '20121002',
736 'uploader_id': '8KVIDEO',
ec85ded8 737 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
738 'description': '',
739 'uploader': '8KVIDEO',
740 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 741 },
4bc3a23e
PH
742 'params': {
743 'youtube_include_dash_manifest': True,
744 'format': '141',
4919603f 745 },
de3c7fe0 746 'skip': 'format 141 not served anymore',
dd27fd17 747 },
8bdd16b4 748 # DASH manifest with encrypted signature
749 {
750 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
751 'info_dict': {
752 'id': 'IB3lcPjvWLA',
753 'ext': 'm4a',
754 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
755 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
756 'duration': 244,
757 'uploader': 'AfrojackVEVO',
758 'uploader_id': 'AfrojackVEVO',
759 'upload_date': '20131011',
cc2db878 760 'abr': 129.495,
8bdd16b4 761 },
762 'params': {
763 'youtube_include_dash_manifest': True,
764 'format': '141/bestaudio[ext=m4a]',
765 },
766 },
aa79ac0c
PH
767 # Controversy video
768 {
769 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
770 'info_dict': {
771 'id': 'T4XJQO3qol8',
772 'ext': 'mp4',
556dbe7f 773 'duration': 219,
aa79ac0c 774 'upload_date': '20100909',
4fe54c12 775 'uploader': 'Amazing Atheist',
aa79ac0c 776 'uploader_id': 'TheAmazingAtheist',
ec85ded8 777 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 778 'title': 'Burning Everyone\'s Koran',
545cc85d 779 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 780 }
c522adb1 781 },
dd2d55f1 782 # Normal age-gate video (embed allowed)
c522adb1 783 {
2d3d2997 784 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
785 'info_dict': {
786 'id': 'HtVdAasjOgU',
787 'ext': 'mp4',
788 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 789 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 790 'duration': 142,
c522adb1
JMF
791 'uploader': 'The Witcher',
792 'uploader_id': 'WitcherGame',
ec85ded8 793 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 794 'upload_date': '20140605',
34952f09 795 'age_limit': 18,
c522adb1
JMF
796 },
797 },
8bdd16b4 798 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
799 # YouTube Red ad is not captured for creator
800 {
801 'url': '__2ABJjxzNo',
802 'info_dict': {
803 'id': '__2ABJjxzNo',
804 'ext': 'mp4',
805 'duration': 266,
806 'upload_date': '20100430',
807 'uploader_id': 'deadmau5',
808 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 809 'creator': 'deadmau5',
810 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 811 'uploader': 'deadmau5',
812 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 813 'alt_title': 'Some Chords',
8bdd16b4 814 },
815 'expected_warnings': [
816 'DASH manifest missing',
817 ]
818 },
067aa17e 819 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
820 {
821 'url': 'lqQg6PlCWgI',
822 'info_dict': {
823 'id': 'lqQg6PlCWgI',
824 'ext': 'mp4',
556dbe7f 825 'duration': 6085,
90227264 826 'upload_date': '20150827',
cbe2bd91 827 'uploader_id': 'olympic',
ec85ded8 828 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 829 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 830 'uploader': 'Olympic',
cbe2bd91
PH
831 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
832 },
833 'params': {
834 'skip_download': 'requires avconv',
e52a40ab 835 }
cbe2bd91 836 },
6271f1ca
PH
837 # Non-square pixels
838 {
839 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
840 'info_dict': {
841 'id': '_b-2C3KPAM0',
842 'ext': 'mp4',
843 'stretched_ratio': 16 / 9.,
556dbe7f 844 'duration': 85,
6271f1ca
PH
845 'upload_date': '20110310',
846 'uploader_id': 'AllenMeow',
ec85ded8 847 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 848 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 849 'uploader': '孫ᄋᄅ',
6271f1ca
PH
850 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
851 },
06b491eb
S
852 },
853 # url_encoded_fmt_stream_map is empty string
854 {
855 'url': 'qEJwOuvDf7I',
856 'info_dict': {
857 'id': 'qEJwOuvDf7I',
f57b7835 858 'ext': 'webm',
06b491eb
S
859 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
860 'description': '',
861 'upload_date': '20150404',
862 'uploader_id': 'spbelect',
863 'uploader': 'Наблюдатели Петербурга',
864 },
865 'params': {
866 'skip_download': 'requires avconv',
e323cf3f
S
867 },
868 'skip': 'This live event has ended.',
06b491eb 869 },
067aa17e 870 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
871 {
872 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
873 'info_dict': {
874 'id': 'FIl7x6_3R5Y',
eb6793ba 875 'ext': 'webm',
da77d856
S
876 'title': 'md5:7b81415841e02ecd4313668cde88737a',
877 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 878 'duration': 220,
da77d856
S
879 'upload_date': '20150625',
880 'uploader_id': 'dorappi2000',
ec85ded8 881 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 882 'uploader': 'dorappi2000',
eb6793ba 883 'formats': 'mincount:31',
da77d856 884 },
eb6793ba 885 'skip': 'not actual anymore',
2ee8f5d8 886 },
8a1a26ce
YCH
887 # DASH manifest with segment_list
888 {
889 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
890 'md5': '8ce563a1d667b599d21064e982ab9e31',
891 'info_dict': {
892 'id': 'CsmdDsKjzN8',
893 'ext': 'mp4',
17ee98e1 894 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
895 'uploader': 'Airtek',
896 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
897 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
898 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
899 },
900 'params': {
901 'youtube_include_dash_manifest': True,
902 'format': '135', # bestvideo
be49068d
S
903 },
904 'skip': 'This live event has ended.',
2ee8f5d8 905 },
cf7e015f
S
906 {
907 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 908 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 909 'info_dict': {
545cc85d 910 'id': 'jvGDaLqkpTg',
911 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
912 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
913 },
914 'playlist': [{
915 'info_dict': {
545cc85d 916 'id': 'jvGDaLqkpTg',
cf7e015f 917 'ext': 'mp4',
545cc85d 918 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
919 'description': 'md5:e03b909557865076822aa169218d6a5d',
920 'duration': 10643,
921 'upload_date': '20161111',
922 'uploader': 'Team PGP',
923 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
924 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
925 },
926 }, {
927 'info_dict': {
545cc85d 928 'id': '3AKt1R1aDnw',
cf7e015f 929 'ext': 'mp4',
545cc85d 930 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
931 'description': 'md5:e03b909557865076822aa169218d6a5d',
932 'duration': 10991,
933 'upload_date': '20161111',
934 'uploader': 'Team PGP',
935 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
936 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
937 },
938 }, {
939 'info_dict': {
545cc85d 940 'id': 'RtAMM00gpVc',
cf7e015f 941 'ext': 'mp4',
545cc85d 942 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
943 'description': 'md5:e03b909557865076822aa169218d6a5d',
944 'duration': 10995,
945 'upload_date': '20161111',
946 'uploader': 'Team PGP',
947 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
948 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
949 },
950 }, {
951 'info_dict': {
545cc85d 952 'id': '6N2fdlP3C5U',
cf7e015f 953 'ext': 'mp4',
545cc85d 954 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
955 'description': 'md5:e03b909557865076822aa169218d6a5d',
956 'duration': 10990,
957 'upload_date': '20161111',
958 'uploader': 'Team PGP',
959 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
960 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
961 },
962 }],
963 'params': {
964 'skip_download': True,
965 },
cbaed4bb 966 },
f9f49d87 967 {
067aa17e 968 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
969 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
970 'info_dict': {
971 'id': 'gVfLd0zydlo',
972 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
973 },
974 'playlist_count': 2,
be49068d 975 'skip': 'Not multifeed anymore',
f9f49d87 976 },
cbaed4bb 977 {
2d3d2997 978 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 979 'only_matching': True,
0e49d9a6 980 },
6d4fc66b 981 {
2d3d2997 982 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
983 'only_matching': True,
984 },
0e49d9a6 985 {
067aa17e 986 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 987 # Also tests cut-off URL expansion in video description (see
067aa17e
S
988 # https://github.com/ytdl-org/youtube-dl/issues/1892,
989 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
990 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
991 'info_dict': {
992 'id': 'lsguqyKfVQg',
993 'ext': 'mp4',
994 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 995 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 996 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 997 'duration': 133,
0e49d9a6
LL
998 'upload_date': '20151119',
999 'uploader_id': 'IronSoulElf',
ec85ded8 1000 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1001 'uploader': 'IronSoulElf',
eb6793ba
S
1002 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1003 'track': 'Dark Walk - Position Music',
1004 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1005 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1006 },
1007 'params': {
1008 'skip_download': True,
1009 },
1010 },
61f92af1 1011 {
067aa17e 1012 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1013 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1014 'only_matching': True,
1015 },
313dfc45
LL
1016 {
1017 # Video with yt:stretch=17:0
1018 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1019 'info_dict': {
1020 'id': 'Q39EVAstoRM',
1021 'ext': 'mp4',
1022 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1023 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1024 'upload_date': '20151107',
1025 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1026 'uploader': 'CH GAMER DROID',
1027 },
1028 'params': {
1029 'skip_download': True,
1030 },
be49068d 1031 'skip': 'This video does not exist.',
313dfc45 1032 },
201c1459 1033 {
1034 # Video with incomplete 'yt:stretch=16:'
1035 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1036 'only_matching': True,
1037 },
7caf9830
S
1038 {
1039 # Video licensed under Creative Commons
1040 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1041 'info_dict': {
1042 'id': 'M4gD1WSo5mA',
1043 'ext': 'mp4',
1044 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1045 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1046 'duration': 721,
7caf9830
S
1047 'upload_date': '20150127',
1048 'uploader_id': 'BerkmanCenter',
ec85ded8 1049 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1050 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1051 'license': 'Creative Commons Attribution license (reuse allowed)',
1052 },
1053 'params': {
1054 'skip_download': True,
1055 },
1056 },
fd050249
S
1057 {
1058 # Channel-like uploader_url
1059 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1060 'info_dict': {
1061 'id': 'eQcmzGIKrzg',
1062 'ext': 'mp4',
1063 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1064 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1065 'duration': 4060,
fd050249 1066 'upload_date': '20151119',
eb6793ba 1067 'uploader': 'Bernie Sanders',
fd050249 1068 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1069 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1070 'license': 'Creative Commons Attribution license (reuse allowed)',
1071 },
1072 'params': {
1073 'skip_download': True,
1074 },
1075 },
040ac686
S
1076 {
1077 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1078 'only_matching': True,
7f29cf54
S
1079 },
1080 {
067aa17e 1081 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1082 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1083 'only_matching': True,
6496ccb4
S
1084 },
1085 {
1086 # Rental video preview
1087 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1088 'info_dict': {
1089 'id': 'uGpuVWrhIzE',
1090 'ext': 'mp4',
1091 'title': 'Piku - Trailer',
1092 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1093 'upload_date': '20150811',
1094 'uploader': 'FlixMatrix',
1095 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1096 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1097 'license': 'Standard YouTube License',
1098 },
1099 'params': {
1100 'skip_download': True,
1101 },
eb6793ba 1102 'skip': 'This video is not available.',
022a5d66 1103 },
12afdc2a
S
1104 {
1105 # YouTube Red video with episode data
1106 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1107 'info_dict': {
1108 'id': 'iqKdEhx-dD4',
1109 'ext': 'mp4',
1110 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1111 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1112 'duration': 2085,
12afdc2a
S
1113 'upload_date': '20170118',
1114 'uploader': 'Vsauce',
1115 'uploader_id': 'Vsauce',
1116 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1117 'series': 'Mind Field',
1118 'season_number': 1,
1119 'episode_number': 1,
1120 },
1121 'params': {
1122 'skip_download': True,
1123 },
1124 'expected_warnings': [
1125 'Skipping DASH manifest',
1126 ],
1127 },
c7121fa7
S
1128 {
1129 # The following content has been identified by the YouTube community
1130 # as inappropriate or offensive to some audiences.
1131 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1132 'info_dict': {
1133 'id': '6SJNVb0GnPI',
1134 'ext': 'mp4',
1135 'title': 'Race Differences in Intelligence',
1136 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1137 'duration': 965,
1138 'upload_date': '20140124',
1139 'uploader': 'New Century Foundation',
1140 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1141 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1142 },
1143 'params': {
1144 'skip_download': True,
1145 },
545cc85d 1146 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1147 },
022a5d66
S
1148 {
1149 # itag 212
1150 'url': '1t24XAntNCY',
1151 'only_matching': True,
fd5c4aab
S
1152 },
1153 {
1154 # geo restricted to JP
1155 'url': 'sJL6WA-aGkQ',
1156 'only_matching': True,
1157 },
cd5a74a2
S
1158 {
1159 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1160 'only_matching': True,
1161 },
bc2ca1bb 1162 {
1163 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1164 'only_matching': True,
1165 },
1166 {
1167 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1168 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1169 'only_matching': True,
1170 },
825cd268
RA
1171 {
1172 # DRM protected
1173 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1174 'only_matching': True,
4fe54c12
S
1175 },
1176 {
1177 # Video with unsupported adaptive stream type formats
1178 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1179 'info_dict': {
1180 'id': 'Z4Vy8R84T1U',
1181 'ext': 'mp4',
1182 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1183 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1184 'duration': 433,
1185 'upload_date': '20130923',
1186 'uploader': 'Amelia Putri Harwita',
1187 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1188 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1189 'formats': 'maxcount:10',
1190 },
1191 'params': {
1192 'skip_download': True,
1193 'youtube_include_dash_manifest': False,
1194 },
5429d6a9 1195 'skip': 'not actual anymore',
5caabd3c 1196 },
1197 {
822b9d9c 1198 # Youtube Music Auto-generated description
5caabd3c 1199 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1200 'info_dict': {
1201 'id': 'MgNrAu2pzNs',
1202 'ext': 'mp4',
1203 'title': 'Voyeur Girl',
1204 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1205 'upload_date': '20190312',
5429d6a9
S
1206 'uploader': 'Stephen - Topic',
1207 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1208 'artist': 'Stephen',
1209 'track': 'Voyeur Girl',
1210 'album': 'it\'s too much love to know my dear',
1211 'release_date': '20190313',
1212 'release_year': 2019,
1213 },
1214 'params': {
1215 'skip_download': True,
1216 },
1217 },
66b48727
RA
1218 {
1219 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1220 'only_matching': True,
1221 },
011e75e6
S
1222 {
1223 # invalid -> valid video id redirection
1224 'url': 'DJztXj2GPfl',
1225 'info_dict': {
1226 'id': 'DJztXj2GPfk',
1227 'ext': 'mp4',
1228 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1229 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1230 'upload_date': '20090125',
1231 'uploader': 'Prochorowka',
1232 'uploader_id': 'Prochorowka',
1233 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1234 'artist': 'Panjabi MC',
1235 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1236 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1237 },
1238 'params': {
1239 'skip_download': True,
1240 },
545cc85d 1241 'skip': 'Video unavailable',
ea74e00b
DP
1242 },
1243 {
1244 # empty description results in an empty string
1245 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1246 'info_dict': {
1247 'id': 'x41yOUIvK2k',
1248 'ext': 'mp4',
1249 'title': 'IMG 3456',
1250 'description': '',
1251 'upload_date': '20170613',
1252 'uploader_id': 'ElevageOrVert',
1253 'uploader': 'ElevageOrVert',
1254 },
1255 'params': {
1256 'skip_download': True,
1257 },
1258 },
a0566bbf 1259 {
29f7c58a 1260 # with '};' inside yt initial data (see [1])
1261 # see [2] for an example with '};' inside ytInitialPlayerResponse
1262 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1263 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1264 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1265 'info_dict': {
1266 'id': 'CHqg6qOn4no',
1267 'ext': 'mp4',
1268 'title': 'Part 77 Sort a list of simple types in c#',
1269 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1270 'upload_date': '20130831',
1271 'uploader_id': 'kudvenkat',
1272 'uploader': 'kudvenkat',
1273 },
1274 'params': {
1275 'skip_download': True,
1276 },
1277 },
29f7c58a 1278 {
1279 # another example of '};' in ytInitialData
1280 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1281 'only_matching': True,
1282 },
1283 {
1284 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1285 'only_matching': True,
1286 },
545cc85d 1287 {
cc2db878 1288 # https://github.com/ytdl-org/youtube-dl/pull/28094
1289 'url': 'OtqTfy26tG0',
1290 'info_dict': {
1291 'id': 'OtqTfy26tG0',
1292 'ext': 'mp4',
1293 'title': 'Burn Out',
1294 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1295 'upload_date': '20141120',
1296 'uploader': 'The Cinematic Orchestra - Topic',
1297 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1299 'artist': 'The Cinematic Orchestra',
1300 'track': 'Burn Out',
1301 'album': 'Every Day',
1302 'release_data': None,
1303 'release_year': None,
1304 },
1305 'params': {
1306 'skip_download': True,
1307 },
545cc85d 1308 },
bc2ca1bb 1309 {
1310 # controversial video, only works with bpctr when authenticated with cookies
1311 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1312 'only_matching': True,
1313 },
f7ad7160 1314 {
1315 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1316 'url': 'cBvYw8_A0vQ',
1317 'info_dict': {
1318 'id': 'cBvYw8_A0vQ',
1319 'ext': 'mp4',
1320 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1321 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1322 'upload_date': '20201120',
1323 'uploader': 'Walk around Japan',
1324 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1325 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1326 },
1327 'params': {
1328 'skip_download': True,
1329 },
0fb983f6 1330 }, {
1331 # Has multiple audio streams
1332 'url': 'WaOKSUlf4TM',
1333 'only_matching': True
9297939e 1334 }, {
1335 # Requires Premium: has format 141 when requested using YTM url
1336 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1337 'only_matching': True
1338 }, {
120916da 1339 # multiple subtitles with same lang_code
1340 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1341 'only_matching': True,
1342 },
2eb88d95
PH
1343 ]
1344
201c1459 1345 @classmethod
1346 def suitable(cls, url):
1bdae7d3 1347 # Hack for lazy extractors until more generic solution is implemented
1348 # (see #28780)
1349 from .youtube import parse_qs
201c1459 1350 qs = parse_qs(url)
1351 if qs.get('list', [None])[0]:
1352 return False
1353 return super(YoutubeIE, cls).suitable(url)
1354
e0df6211
PH
1355 def __init__(self, *args, **kwargs):
1356 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1357 self._code_cache = {}
83799698 1358 self._player_cache = {}
e0df6211 1359
60064c53
PH
1360 def _signature_cache_id(self, example_sig):
1361 """ Return a string representation of a signature """
78caa52a 1362 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1363
e40c758c
S
1364 @classmethod
1365 def _extract_player_info(cls, player_url):
1366 for player_re in cls._PLAYER_INFO_RE:
1367 id_m = re.search(player_re, player_url)
1368 if id_m:
1369 break
1370 else:
c081b35c 1371 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1372 return id_m.group('id')
e40c758c
S
1373
1374 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1375 player_id = self._extract_player_info(player_url)
e0df6211 1376
c4417ddb 1377 # Read from filesystem cache
545cc85d 1378 func_id = 'js_%s_%s' % (
1379 player_id, self._signature_cache_id(example_sig))
c4417ddb 1380 assert os.path.basename(func_id) == func_id
a0e07d31 1381
69ea8ca4 1382 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1383 if cache_spec is not None:
78caa52a 1384 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1385
545cc85d 1386 if player_id not in self._code_cache:
1387 self._code_cache[player_id] = self._download_webpage(
e0df6211 1388 player_url, video_id,
545cc85d 1389 note='Downloading player ' + player_id,
69ea8ca4 1390 errnote='Download of %s failed' % player_url)
545cc85d 1391 code = self._code_cache[player_id]
1392 res = self._parse_sig_js(code)
e0df6211 1393
785521bf
PH
1394 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1395 cache_res = res(test_string)
1396 cache_spec = [ord(c) for c in cache_res]
83799698 1397
69ea8ca4 1398 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1399 return res
1400
60064c53 1401 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1402 def gen_sig_code(idxs):
1403 def _genslice(start, end, step):
78caa52a 1404 starts = '' if start == 0 else str(start)
8bcc8756 1405 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1406 steps = '' if step == 1 else (':%d' % step)
78caa52a 1407 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1408
1409 step = None
7af808a5
PH
1410 # Quelch pyflakes warnings - start will be set when step is set
1411 start = '(Never used)'
edf3e38e
PH
1412 for i, prev in zip(idxs[1:], idxs[:-1]):
1413 if step is not None:
1414 if i - prev == step:
1415 continue
1416 yield _genslice(start, prev, step)
1417 step = None
1418 continue
1419 if i - prev in [-1, 1]:
1420 step = i - prev
1421 start = prev
1422 continue
1423 else:
78caa52a 1424 yield 's[%d]' % prev
edf3e38e 1425 if step is None:
78caa52a 1426 yield 's[%d]' % i
edf3e38e
PH
1427 else:
1428 yield _genslice(start, i, step)
1429
78caa52a 1430 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1431 cache_res = func(test_string)
edf3e38e 1432 cache_spec = [ord(c) for c in cache_res]
78caa52a 1433 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1434 signature_id_tuple = '(%s)' % (
1435 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1436 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1437 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1438 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1439
e0df6211
PH
1440 def _parse_sig_js(self, jscode):
1441 funcname = self._search_regex(
abefc03f
S
1442 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1443 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1444 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1445 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1446 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1447 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1448 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1449 # Obsolete patterns
1450 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1451 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1452 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1453 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1454 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1455 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1456 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1457 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1458 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1459
1460 jsi = JSInterpreter(jscode)
1461 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1462 return lambda s: initial_function([s])
1463
545cc85d 1464 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1465 """Turn the encrypted s field into a working signature"""
6b37f0be 1466
c8bf86d5 1467 if player_url is None:
69ea8ca4 1468 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1469
69ea8ca4 1470 if player_url.startswith('//'):
78caa52a 1471 player_url = 'https:' + player_url
3c90cc8b
S
1472 elif not re.match(r'https?://', player_url):
1473 player_url = compat_urlparse.urljoin(
1474 'https://www.youtube.com', player_url)
c8bf86d5 1475 try:
62af3a0e 1476 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1477 if player_id not in self._player_cache:
1478 func = self._extract_signature_function(
60064c53 1479 video_id, player_url, s
c8bf86d5
PH
1480 )
1481 self._player_cache[player_id] = func
1482 func = self._player_cache[player_id]
a06916d9 1483 if self.get_param('youtube_print_sig_code'):
60064c53 1484 self._print_sig_code(func, s)
c8bf86d5
PH
1485 return func(s)
1486 except Exception as e:
1487 tb = traceback.format_exc()
1488 raise ExtractorError(
78caa52a 1489 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1490
545cc85d 1491 def _mark_watched(self, video_id, player_response):
21c340b8
S
1492 playback_url = url_or_none(try_get(
1493 player_response,
545cc85d 1494 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1495 if not playback_url:
1496 return
1497 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1498 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1499
1500 # cpn generation algorithm is reverse engineered from base.js.
1501 # In fact it works even with dummy cpn.
1502 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1503 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1504
1505 qs.update({
1506 'ver': ['2'],
1507 'cpn': [cpn],
1508 })
1509 playback_url = compat_urlparse.urlunparse(
15707c7e 1510 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1511
1512 self._download_webpage(
1513 playback_url, video_id, 'Marking watched',
1514 'Unable to mark watched', fatal=False)
1515
66c9fa36
S
1516 @staticmethod
1517 def _extract_urls(webpage):
1518 # Embedded YouTube player
1519 entries = [
1520 unescapeHTML(mobj.group('url'))
1521 for mobj in re.finditer(r'''(?x)
1522 (?:
1523 <iframe[^>]+?src=|
1524 data-video-url=|
1525 <embed[^>]+?src=|
1526 embedSWF\(?:\s*|
1527 <object[^>]+data=|
1528 new\s+SWFObject\(
1529 )
1530 (["\'])
1531 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1532 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1533 \1''', webpage)]
1534
1535 # lazyYT YouTube embed
1536 entries.extend(list(map(
1537 unescapeHTML,
1538 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1539
1540 # Wordpress "YouTube Video Importer" plugin
1541 matches = re.findall(r'''(?x)<div[^>]+
1542 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1543 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1544 entries.extend(m[-1] for m in matches)
1545
1546 return entries
1547
1548 @staticmethod
1549 def _extract_url(webpage):
1550 urls = YoutubeIE._extract_urls(webpage)
1551 return urls[0] if urls else None
1552
97665381
PH
1553 @classmethod
1554 def extract_id(cls, url):
1555 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1556 if mobj is None:
69ea8ca4 1557 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1558 video_id = mobj.group(2)
1559 return video_id
1560
545cc85d 1561 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1562 chapters_list = try_get(
8bdd16b4 1563 data,
84213ea8
S
1564 lambda x: x['playerOverlays']
1565 ['playerOverlayRenderer']
1566 ['decoratedPlayerBarRenderer']
1567 ['decoratedPlayerBarRenderer']
1568 ['playerBar']
1569 ['chapteredPlayerBarRenderer']
1570 ['chapters'],
1571 list)
1572 if not chapters_list:
1573 return
1574
1575 def chapter_time(chapter):
1576 return float_or_none(
1577 try_get(
1578 chapter,
1579 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1580 int),
1581 scale=1000)
1582 chapters = []
1583 for next_num, chapter in enumerate(chapters_list, start=1):
1584 start_time = chapter_time(chapter)
1585 if start_time is None:
1586 continue
1587 end_time = (chapter_time(chapters_list[next_num])
1588 if next_num < len(chapters_list) else duration)
1589 if end_time is None:
1590 continue
1591 title = try_get(
1592 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1593 compat_str)
1594 chapters.append({
1595 'start_time': start_time,
1596 'end_time': end_time,
1597 'title': title,
1598 })
1599 return chapters
1600
545cc85d 1601 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1602 return self._parse_json(self._search_regex(
1603 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1604 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1605
d92f5d5a 1606 @staticmethod
1607 def parse_time_text(time_text):
1608 """
1609 Parse the comment time text
1610 time_text is in the format 'X units ago (edited)'
1611 """
1612 time_text_split = time_text.split(' ')
1613 if len(time_text_split) >= 3:
1614 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1615
a1c5d2ca
M
1616 @staticmethod
1617 def _join_text_entries(runs):
1618 text = None
1619 for run in runs:
1620 if not isinstance(run, dict):
1621 continue
1622 sub_text = try_get(run, lambda x: x['text'], compat_str)
1623 if sub_text:
1624 if not text:
1625 text = sub_text
1626 continue
1627 text += sub_text
1628 return text
1629
1630 def _extract_comment(self, comment_renderer, parent=None):
1631 comment_id = comment_renderer.get('commentId')
1632 if not comment_id:
1633 return
1634 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1635 text = self._join_text_entries(comment_text_runs) or ''
1636 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1637 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1638 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1639 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1640 author_id = try_get(comment_renderer,
1641 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1642 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1643 lambda x: x['likeCount']), compat_str)) or 0
1644 author_thumbnail = try_get(comment_renderer,
1645 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1646
1647 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1648 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1649 return {
1650 'id': comment_id,
1651 'text': text,
d92f5d5a 1652 'timestamp': timestamp,
a1c5d2ca
M
1653 'time_text': time_text,
1654 'like_count': votes,
1655 'is_favorited': is_liked,
1656 'author': author,
1657 'author_id': author_id,
1658 'author_thumbnail': author_thumbnail,
1659 'author_is_uploader': author_is_uploader,
1660 'parent': parent or 'root'
1661 }
1662
1663 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1664 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1665
1666 def extract_thread(parent_renderer):
1667 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1668 if not parent:
1669 comment_counts[2] = 0
1670 for content in contents:
1671 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1672 comment_renderer = try_get(
1673 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1674 content, (lambda x: x['commentRenderer'], dict))
1675
1676 if not comment_renderer:
1677 continue
1678 comment = self._extract_comment(comment_renderer, parent)
1679 if not comment:
1680 continue
1681 comment_counts[0] += 1
1682 yield comment
1683 # Attempt to get the replies
1684 comment_replies_renderer = try_get(
1685 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1686
1687 if comment_replies_renderer:
1688 comment_counts[2] += 1
1689 comment_entries_iter = self._comment_entries(
f4f751af 1690 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1691 parent=comment.get('id'), session_token_list=session_token_list,
1692 comment_counts=comment_counts)
1693
1694 for reply_comment in comment_entries_iter:
1695 yield reply_comment
1696
1697 if not comment_counts:
1698 # comment so far, est. total comments, current comment thread #
1699 comment_counts = [0, 0, 0]
a1c5d2ca
M
1700
1701 # TODO: Generalize the download code with TabIE
f4f751af 1702 context = self._extract_context(ytcfg)
1703 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1704 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1705 first_continuation = False
1706 if parent is None:
1707 first_continuation = True
1708
1709 for page_num in itertools.count(0):
1710 if not continuation:
1711 break
f4f751af 1712 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a06916d9 1713 retries = self.get_param('extractor_retries', 3)
a1c5d2ca
M
1714 count = -1
1715 last_error = None
1716
1717 while count < retries:
1718 count += 1
1719 if last_error:
1720 self.report_warning('%s. Retrying ...' % last_error)
1721 try:
1722 query = {
1723 'ctoken': continuation['ctoken'],
1724 'pbj': 1,
1725 'type': 'next',
1726 }
1727 if parent:
1728 query['action_get_comment_replies'] = 1
1729 else:
1730 query['action_get_comments'] = 1
1731
1732 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1733 if page_num == 0:
1734 if first_continuation:
d92f5d5a 1735 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1736 else:
d92f5d5a 1737 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1738 else:
d92f5d5a 1739 note_prefix = '%sDownloading comment%s page %d %s' % (
1740 ' ' if parent else '',
a1c5d2ca
M
1741 ' replies' if parent else '',
1742 page_num,
1743 comment_prog_str)
1744
1745 browse = self._download_json(
1746 'https://www.youtube.com/comment_service_ajax', None,
1747 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1748 headers=headers, query=query,
1749 data=urlencode_postdata({
1750 'session_token': session_token_list[0]
1751 }))
1752 except ExtractorError as e:
1753 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1754 if e.cause.code == 413:
d92f5d5a 1755 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1756 return
1757 # Downloading page may result in intermittent 5xx HTTP error
1758 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1759 last_error = 'HTTP Error %s' % e.cause.code
1760 if e.cause.code == 404:
d92f5d5a 1761 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1762 if count < retries:
1763 continue
1764 raise
1765 else:
1766 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1767 if session_token:
1768 session_token_list[0] = session_token
1769
1770 response = try_get(browse,
1771 (lambda x: x['response'],
1772 lambda x: x[1]['response'])) or {}
1773
1774 if response.get('continuationContents'):
1775 break
1776
1777 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1778 if browse.get('reload'):
d92f5d5a 1779 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1780
1781 # TODO: not tested, merged from old extractor
1782 err_msg = browse.get('externalErrorMessage')
1783 if err_msg:
1784 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1785
1786 # Youtube sometimes sends incomplete data
1787 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1788 last_error = 'Incomplete data received'
1789 if count >= retries:
6a39ee13 1790 raise ExtractorError(last_error)
a1c5d2ca
M
1791
1792 if not response:
1793 break
f4f751af 1794 visitor_data = try_get(
1795 response,
1796 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1797 compat_str) or visitor_data
a1c5d2ca
M
1798
1799 known_continuation_renderers = {
1800 'itemSectionContinuation': extract_thread,
1801 'commentRepliesContinuation': extract_thread
1802 }
1803
1804 # extract next root continuation from the results
1805 continuation_contents = try_get(
1806 response, lambda x: x['continuationContents'], dict) or {}
1807
1808 for key, value in continuation_contents.items():
1809 if key not in known_continuation_renderers:
1810 continue
1811 continuation_renderer = value
1812
1813 if first_continuation:
1814 first_continuation = False
1815 expected_comment_count = try_get(
1816 continuation_renderer,
1817 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1818 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1819 compat_str)
1820
1821 if expected_comment_count:
1822 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1823 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1824 yield comment_counts[1]
1825
1826 # TODO: cli arg.
1827 # 1/True for newest, 0/False for popular (default)
1828 comment_sort_index = int(True)
1829 sort_continuation_renderer = try_get(
1830 continuation_renderer,
1831 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1832 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1833 # If this fails, the initial continuation page
1834 # starts off with popular anyways.
1835 if sort_continuation_renderer:
1836 continuation = YoutubeTabIE._build_continuation_query(
1837 continuation=sort_continuation_renderer.get('continuation'),
1838 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1839 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1840 break
1841
1842 for entry in known_continuation_renderers[key](continuation_renderer):
1843 yield entry
1844
1845 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1846 break
1847
1848 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1849 """Entry for comment extraction"""
1850 comments = []
1851 known_entry_comment_renderers = (
1852 'itemSectionRenderer',
1853 )
1854 estimated_total = 0
1855 for entry in contents:
1856 for key, renderer in entry.items():
1857 if key not in known_entry_comment_renderers:
1858 continue
1859
1860 comment_iter = self._comment_entries(
1861 renderer,
1862 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1863 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1864 ytcfg=ytcfg,
a1c5d2ca
M
1865 session_token_list=[xsrf_token])
1866
1867 for comment in comment_iter:
1868 if isinstance(comment, int):
1869 estimated_total = comment
1870 continue
1871 comments.append(comment)
1872 break
d92f5d5a 1873 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1874 return {
1875 'comments': comments,
1876 'comment_count': len(comments),
1877 }
1878
c5e8d7af 1879 def _real_extract(self, url):
cf7e015f 1880 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1881 video_id = self._match_id(url)
9297939e 1882
1883 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
1884
545cc85d 1885 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1886 webpage_url = base_url + 'watch?v=' + video_id
1887 webpage = self._download_webpage(
cce889b9 1888 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1889
9297939e 1890 def get_text(x):
1891 if not x:
1892 return
1893 text = x.get('simpleText')
1894 if text and isinstance(text, compat_str):
1895 return text
1896 runs = x.get('runs')
1897 if not isinstance(runs, list):
1898 return
1899 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
1900
1901 ytm_streaming_data = {}
1902 if is_music_url:
1903 # we are forcing to use parse_json because 141 only appeared in get_video_info.
1904 # el, c, cver, cplayer field required for 141(aac 256kbps) codec
1905 # maybe paramter of youtube music player?
1906 ytm_player_response = self._parse_json(try_get(compat_parse_qs(
1907 self._download_webpage(
1908 base_url + 'get_video_info', video_id,
fe03a6cd 1909 'Fetching youtube music info webpage',
1910 'unable to download youtube music info webpage', query={
9297939e 1911 'video_id': video_id,
1912 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1913 'el': 'detailpage',
1914 'c': 'WEB_REMIX',
1915 'cver': '0.1',
00ae2769 1916 'cplayer': 'UNIPLAYER',
1917 'html5': '1',
9297939e 1918 }, fatal=False)),
1919 lambda x: x['player_response'][0],
1920 compat_str) or '{}', video_id)
1921 ytm_streaming_data = ytm_player_response.get('streamingData') or {}
1922
545cc85d 1923 player_response = None
1924 if webpage:
1925 player_response = self._extract_yt_initial_variable(
1926 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1927 video_id, 'initial player response')
f4f751af 1928
1929 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1930 if not player_response:
1931 player_response = self._call_api(
f4f751af 1932 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1933
1934 playability_status = player_response.get('playabilityStatus') or {}
1935 if playability_status.get('reason') == 'Sign in to confirm your age':
1936 pr = self._parse_json(try_get(compat_parse_qs(
1937 self._download_webpage(
1938 base_url + 'get_video_info', video_id,
1939 'Refetching age-gated info webpage',
1940 'unable to download video info webpage', query={
1941 'video_id': video_id,
7c60c33e 1942 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
00ae2769 1943 'html5': '1',
545cc85d 1944 }, fatal=False)),
1945 lambda x: x['player_response'][0],
1946 compat_str) or '{}', video_id)
1947 if pr:
1948 player_response = pr
1949
1950 trailer_video_id = try_get(
1951 playability_status,
1952 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1953 compat_str)
1954 if trailer_video_id:
1955 return self.url_result(
1956 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1957
545cc85d 1958 search_meta = (
1959 lambda x: self._html_search_meta(x, webpage, default=None)) \
1960 if webpage else lambda x: None
dbdaaa23 1961
545cc85d 1962 video_details = player_response.get('videoDetails') or {}
37357d21 1963 microformat = try_get(
545cc85d 1964 player_response,
1965 lambda x: x['microformat']['playerMicroformatRenderer'],
1966 dict) or {}
1967 video_title = video_details.get('title') \
1968 or get_text(microformat.get('title')) \
1969 or search_meta(['og:title', 'twitter:title', 'title'])
1970 video_description = video_details.get('shortDescription')
cf7e015f 1971
8fe10494 1972 if not smuggled_data.get('force_singlefeed', False):
a06916d9 1973 if not self.get_param('noplaylist'):
8fe10494
S
1974 multifeed_metadata_list = try_get(
1975 player_response,
1976 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1977 compat_str)
8fe10494
S
1978 if multifeed_metadata_list:
1979 entries = []
1980 feed_ids = []
1981 for feed in multifeed_metadata_list.split(','):
1982 # Unquote should take place before split on comma (,) since textual
1983 # fields may contain comma as well (see
067aa17e 1984 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1985 feed_data = compat_parse_qs(
1986 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1987
1988 def feed_entry(name):
545cc85d 1989 return try_get(
1990 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1991
1992 feed_id = feed_entry('id')
1993 if not feed_id:
1994 continue
1995 feed_title = feed_entry('title')
1996 title = video_title
1997 if feed_title:
1998 title += ' (%s)' % feed_title
8fe10494
S
1999 entries.append({
2000 '_type': 'url_transparent',
2001 'ie_key': 'Youtube',
2002 'url': smuggle_url(
545cc85d 2003 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2004 {'force_singlefeed': True}),
6b09401b 2005 'title': title,
8fe10494 2006 })
6b09401b 2007 feed_ids.append(feed_id)
8fe10494
S
2008 self.to_screen(
2009 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2010 % (', '.join(feed_ids), video_id))
545cc85d 2011 return self.playlist_result(
2012 entries, video_id, video_title, video_description)
8fe10494
S
2013 else:
2014 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2015
9297939e 2016 formats, itags, stream_ids = [], [], []
cc2db878 2017 itag_qualities = {}
545cc85d 2018 player_url = None
d3fc8074 2019 q = qualities([
2020 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2021 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2022 ])
9297939e 2023
545cc85d 2024 streaming_data = player_response.get('streamingData') or {}
2025 streaming_formats = streaming_data.get('formats') or []
2026 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2027 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2028 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2029
545cc85d 2030 for fmt in streaming_formats:
2031 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2032 continue
321bf820 2033
cc2db878 2034 itag = str_or_none(fmt.get('itag'))
9297939e 2035 audio_track = fmt.get('audioTrack') or {}
2036 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2037 if stream_id in stream_ids:
2038 continue
2039
cc2db878 2040 quality = fmt.get('quality')
d3fc8074 2041 if quality == 'tiny' or not quality:
2042 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2043 if itag and quality:
2044 itag_qualities[itag] = quality
2045 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2046 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2047 # number of fragment that would subsequently requested with (`&sq=N`)
2048 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2049 continue
2050
545cc85d 2051 fmt_url = fmt.get('url')
2052 if not fmt_url:
2053 sc = compat_parse_qs(fmt.get('signatureCipher'))
2054 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2055 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2056 if not (sc and fmt_url and encrypted_sig):
2057 continue
2058 if not player_url:
2059 if not webpage:
2060 continue
2061 player_url = self._search_regex(
2062 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
2063 webpage, 'player URL', fatal=False)
2064 if not player_url:
201e9eaa 2065 continue
545cc85d 2066 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2067 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2068 fmt_url += '&' + sp + '=' + signature
2069
545cc85d 2070 if itag:
2071 itags.append(itag)
9297939e 2072 stream_ids.append(stream_id)
2073
cc2db878 2074 tbr = float_or_none(
2075 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2076 dct = {
2077 'asr': int_or_none(fmt.get('audioSampleRate')),
2078 'filesize': int_or_none(fmt.get('contentLength')),
2079 'format_id': itag,
0fb983f6 2080 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2081 'fps': int_or_none(fmt.get('fps')),
2082 'height': int_or_none(fmt.get('height')),
dca3ff4a 2083 'quality': q(quality),
cc2db878 2084 'tbr': tbr,
545cc85d 2085 'url': fmt_url,
2086 'width': fmt.get('width'),
0fb983f6 2087 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2088 }
2089 mimetype = fmt.get('mimeType')
2090 if mimetype:
2091 mobj = re.match(
2092 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2093 if mobj:
2094 dct['ext'] = mimetype2ext(mobj.group(1))
2095 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2096 no_audio = dct.get('acodec') == 'none'
2097 no_video = dct.get('vcodec') == 'none'
2098 if no_audio:
2099 dct['vbr'] = tbr
2100 if no_video:
2101 dct['abr'] = tbr
2102 if no_audio or no_video:
545cc85d 2103 dct['downloader_options'] = {
2104 # Youtube throttles chunks >~10M
2105 'http_chunk_size': 10485760,
bf1317d2 2106 }
7c60c33e 2107 if dct.get('ext'):
2108 dct['container'] = dct['ext'] + '_dash'
545cc85d 2109 formats.append(dct)
2110
9297939e 2111 for sd in (streaming_data, ytm_streaming_data):
2112 hls_manifest_url = sd.get('hlsManifestUrl')
2113 if hls_manifest_url:
2114 for f in self._extract_m3u8_formats(
2115 hls_manifest_url, video_id, 'mp4', fatal=False):
2116 itag = self._search_regex(
2117 r'/itag/(\d+)', f['url'], 'itag', default=None)
2118 if itag:
2119 f['format_id'] = itag
8d68ab98 2120 formats.append(f)
545cc85d 2121
a06916d9 2122 if self.get_param('youtube_include_dash_manifest', True):
9297939e 2123 for sd in (streaming_data, ytm_streaming_data):
2124 dash_manifest_url = sd.get('dashManifestUrl')
2125 if dash_manifest_url:
2126 for f in self._extract_mpd_formats(
2127 dash_manifest_url, video_id, fatal=False):
2128 itag = f['format_id']
2129 if itag in itags:
2130 continue
2131 if itag in itag_qualities:
9297939e 2132 f['quality'] = q(itag_qualities[itag])
2133 filesize = int_or_none(self._search_regex(
2134 r'/clen/(\d+)', f.get('fragment_base_url')
2135 or f['url'], 'file size', default=None))
2136 if filesize:
2137 f['filesize'] = filesize
2138 formats.append(f)
bf1317d2 2139
545cc85d 2140 if not formats:
a06916d9 2141 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2142 self.raise_no_formats(
545cc85d 2143 'This video is DRM protected.', expected=True)
2144 pemr = try_get(
2145 playability_status,
2146 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2147 dict) or {}
2148 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2149 subreason = pemr.get('subreason')
2150 if subreason:
2151 subreason = clean_html(get_text(subreason))
2152 if subreason == 'The uploader has not made this video available in your country.':
2153 countries = microformat.get('availableCountries')
2154 if not countries:
2155 regions_allowed = search_meta('regionsAllowed')
2156 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2157 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2158 reason += '\n' + subreason
2159 if reason:
b7da73eb 2160 self.raise_no_formats(reason, expected=True)
bf1317d2 2161
545cc85d 2162 self._sort_formats(formats)
bf1317d2 2163
545cc85d 2164 keywords = video_details.get('keywords') or []
2165 if not keywords and webpage:
2166 keywords = [
2167 unescapeHTML(m.group('content'))
2168 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2169 for keyword in keywords:
2170 if keyword.startswith('yt:stretch='):
201c1459 2171 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2172 if mobj:
2173 # NB: float is intentional for forcing float division
2174 w, h = (float(v) for v in mobj.groups())
2175 if w > 0 and h > 0:
2176 ratio = w / h
2177 for f in formats:
2178 if f.get('vcodec') != 'none':
2179 f['stretched_ratio'] = ratio
2180 break
6449cd80 2181
545cc85d 2182 thumbnails = []
2183 for container in (video_details, microformat):
2184 for thumbnail in (try_get(
2185 container,
2186 lambda x: x['thumbnail']['thumbnails'], list) or []):
2187 thumbnail_url = thumbnail.get('url')
2188 if not thumbnail_url:
bf1317d2 2189 continue
1988fab7 2190 # Sometimes youtube gives a wrong thumbnail URL. See:
2191 # https://github.com/yt-dlp/yt-dlp/issues/233
2192 # https://github.com/ytdl-org/youtube-dl/issues/28023
2193 if 'maxresdefault' in thumbnail_url:
2194 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2195 thumbnails.append({
545cc85d 2196 'url': thumbnail_url,
ff2751ac 2197 'height': int_or_none(thumbnail.get('height')),
545cc85d 2198 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2199 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2200 })
ff2751ac 2201 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2202 if thumbnail_url:
2203 thumbnails.append({
2204 'url': thumbnail_url,
2205 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2206 })
2207 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2208 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2209 thumbnails.append({
2210 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2211 'preference': 1,
2212 })
2213 self._remove_duplicate_formats(thumbnails)
545cc85d 2214
2215 category = microformat.get('category') or search_meta('genre')
2216 channel_id = video_details.get('channelId') \
2217 or microformat.get('externalChannelId') \
2218 or search_meta('channelId')
2219 duration = int_or_none(
2220 video_details.get('lengthSeconds')
2221 or microformat.get('lengthSeconds')) \
2222 or parse_duration(search_meta('duration'))
2223 is_live = video_details.get('isLive')
2224 owner_profile_url = microformat.get('ownerProfileUrl')
2225
2226 info = {
2227 'id': video_id,
2228 'title': self._live_title(video_title) if is_live else video_title,
2229 'formats': formats,
2230 'thumbnails': thumbnails,
2231 'description': video_description,
2232 'upload_date': unified_strdate(
2233 microformat.get('uploadDate')
2234 or search_meta('uploadDate')),
2235 'uploader': video_details['author'],
2236 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2237 'uploader_url': owner_profile_url,
2238 'channel_id': channel_id,
2239 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2240 'duration': duration,
2241 'view_count': int_or_none(
2242 video_details.get('viewCount')
2243 or microformat.get('viewCount')
2244 or search_meta('interactionCount')),
2245 'average_rating': float_or_none(video_details.get('averageRating')),
2246 'age_limit': 18 if (
2247 microformat.get('isFamilySafe') is False
2248 or search_meta('isFamilyFriendly') == 'false'
2249 or search_meta('og:restrictions:age') == '18+') else 0,
2250 'webpage_url': webpage_url,
2251 'categories': [category] if category else None,
2252 'tags': keywords,
2253 'is_live': is_live,
2254 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2255 'was_live': video_details.get('isLiveContent'),
545cc85d 2256 }
b477fc13 2257
545cc85d 2258 pctr = try_get(
2259 player_response,
2260 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2261 subtitles = {}
2262 if pctr:
774d79cc 2263 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2264 lang_subs = container.setdefault(lang_code, [])
545cc85d 2265 for fmt in self._SUBTITLE_FORMATS:
2266 query.update({
2267 'fmt': fmt,
2268 })
2269 lang_subs.append({
2270 'ext': fmt,
2271 'url': update_url_query(base_url, query),
774d79cc 2272 'name': sub_name,
545cc85d 2273 })
7e72694b 2274
545cc85d 2275 for caption_track in (pctr.get('captionTracks') or []):
2276 base_url = caption_track.get('baseUrl')
2277 if not base_url:
2278 continue
2279 if caption_track.get('kind') != 'asr':
120916da 2280 lang_code = (
2281 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2282 or caption_track.get('languageCode'))
545cc85d 2283 if not lang_code:
2284 continue
2285 process_language(
774d79cc 2286 subtitles, base_url, lang_code,
2287 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2288 {})
545cc85d 2289 continue
2290 automatic_captions = {}
2291 for translation_language in (pctr.get('translationLanguages') or []):
2292 translation_language_code = translation_language.get('languageCode')
2293 if not translation_language_code:
2294 continue
2295 process_language(
2296 automatic_captions, base_url, translation_language_code,
774d79cc 2297 try_get(translation_language, lambda x: x['languageName']['simpleText']),
545cc85d 2298 {'tlang': translation_language_code})
2299 info['automatic_captions'] = automatic_captions
2300 info['subtitles'] = subtitles
7e72694b 2301
545cc85d 2302 parsed_url = compat_urllib_parse_urlparse(url)
2303 for component in [parsed_url.fragment, parsed_url.query]:
2304 query = compat_parse_qs(component)
2305 for k, v in query.items():
2306 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2307 d_k += '_time'
2308 if d_k not in info and k in s_ks:
2309 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2310
2311 # Youtube Music Auto-generated description
822b9d9c 2312 if video_description:
38d70284 2313 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2314 if mobj:
822b9d9c
RA
2315 release_year = mobj.group('release_year')
2316 release_date = mobj.group('release_date')
2317 if release_date:
2318 release_date = release_date.replace('-', '')
2319 if not release_year:
545cc85d 2320 release_year = release_date[:4]
2321 info.update({
2322 'album': mobj.group('album'.strip()),
2323 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2324 'track': mobj.group('track').strip(),
2325 'release_date': release_date,
cc2db878 2326 'release_year': int_or_none(release_year),
545cc85d 2327 })
7e72694b 2328
545cc85d 2329 initial_data = None
2330 if webpage:
2331 initial_data = self._extract_yt_initial_variable(
2332 webpage, self._YT_INITIAL_DATA_RE, video_id,
2333 'yt initial data')
2334 if not initial_data:
2335 initial_data = self._call_api(
f4f751af 2336 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2337
2338 if not is_live:
2339 try:
2340 # This will error if there is no livechat
2341 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2342 info['subtitles']['live_chat'] = [{
394dcd44 2343 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2344 'video_id': video_id,
2345 'ext': 'json',
2346 'protocol': 'youtube_live_chat_replay',
2347 }]
2348 except (KeyError, IndexError, TypeError):
2349 pass
2350
2351 if initial_data:
2352 chapters = self._extract_chapters_from_json(
2353 initial_data, video_id, duration)
2354 if not chapters:
2355 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2356 contents = try_get(
2357 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2358 list)
2359 if not contents:
2360 continue
2361
2362 def chapter_time(mmlir):
2363 return parse_duration(
2364 get_text(mmlir.get('timeDescription')))
2365
2366 chapters = []
2367 for next_num, content in enumerate(contents, start=1):
2368 mmlir = content.get('macroMarkersListItemRenderer') or {}
2369 start_time = chapter_time(mmlir)
2370 end_time = chapter_time(try_get(
2371 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2372 if next_num < len(contents) else duration
2373 if start_time is None or end_time is None:
2374 continue
2375 chapters.append({
2376 'start_time': start_time,
2377 'end_time': end_time,
2378 'title': get_text(mmlir.get('title')),
2379 })
2380 if chapters:
2381 break
2382 if chapters:
2383 info['chapters'] = chapters
2384
2385 contents = try_get(
2386 initial_data,
2387 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2388 list) or []
2389 for content in contents:
2390 vpir = content.get('videoPrimaryInfoRenderer')
2391 if vpir:
2392 stl = vpir.get('superTitleLink')
2393 if stl:
2394 stl = get_text(stl)
2395 if try_get(
2396 vpir,
2397 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2398 info['location'] = stl
2399 else:
2400 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2401 if mobj:
2402 info.update({
2403 'series': mobj.group(1),
2404 'season_number': int(mobj.group(2)),
2405 'episode_number': int(mobj.group(3)),
2406 })
2407 for tlb in (try_get(
2408 vpir,
2409 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2410 list) or []):
2411 tbr = tlb.get('toggleButtonRenderer') or {}
2412 for getter, regex in [(
2413 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2414 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2415 lambda x: x['accessibility'],
2416 lambda x: x['accessibilityData']['accessibilityData'],
2417 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2418 label = (try_get(tbr, getter, dict) or {}).get('label')
2419 if label:
2420 mobj = re.match(regex, label)
2421 if mobj:
2422 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2423 break
2424 sbr_tooltip = try_get(
2425 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2426 if sbr_tooltip:
2427 like_count, dislike_count = sbr_tooltip.split(' / ')
2428 info.update({
2429 'like_count': str_to_int(like_count),
2430 'dislike_count': str_to_int(dislike_count),
2431 })
2432 vsir = content.get('videoSecondaryInfoRenderer')
2433 if vsir:
2434 info['channel'] = get_text(try_get(
2435 vsir,
2436 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2437 dict))
545cc85d 2438 rows = try_get(
2439 vsir,
2440 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2441 list) or []
2442 multiple_songs = False
2443 for row in rows:
2444 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2445 multiple_songs = True
2446 break
2447 for row in rows:
2448 mrr = row.get('metadataRowRenderer') or {}
2449 mrr_title = mrr.get('title')
2450 if not mrr_title:
2451 continue
2452 mrr_title = get_text(mrr['title'])
2453 mrr_contents_text = get_text(mrr['contents'][0])
2454 if mrr_title == 'License':
2455 info['license'] = mrr_contents_text
2456 elif not multiple_songs:
2457 if mrr_title == 'Album':
2458 info['album'] = mrr_contents_text
2459 elif mrr_title == 'Artist':
2460 info['artist'] = mrr_contents_text
2461 elif mrr_title == 'Song':
2462 info['track'] = mrr_contents_text
2463
2464 fallbacks = {
2465 'channel': 'uploader',
2466 'channel_id': 'uploader_id',
2467 'channel_url': 'uploader_url',
2468 }
2469 for to, frm in fallbacks.items():
2470 if not info.get(to):
2471 info[to] = info.get(frm)
2472
2473 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2474 v = info.get(s_k)
2475 if v:
2476 info[d_k] = v
b84071c0 2477
c224251a
M
2478 is_private = bool_or_none(video_details.get('isPrivate'))
2479 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2480 is_membersonly = None
b28f8d24 2481 is_premium = None
c224251a
M
2482 if initial_data and is_private is not None:
2483 is_membersonly = False
b28f8d24 2484 is_premium = False
c224251a
M
2485 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2486 for content in contents or []:
2487 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2488 for badge in badges or []:
2489 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2490 if label.lower() == 'members only':
2491 is_membersonly = True
2492 break
b28f8d24
M
2493 elif label.lower() == 'premium':
2494 is_premium = True
2495 break
2496 if is_membersonly or is_premium:
c224251a
M
2497 break
2498
2499 # TODO: Add this for playlists
2500 info['availability'] = self._availability(
2501 is_private=is_private,
b28f8d24 2502 needs_premium=is_premium,
c224251a
M
2503 needs_subscription=is_membersonly,
2504 needs_auth=info['age_limit'] >= 18,
2505 is_unlisted=None if is_private is None else is_unlisted)
2506
06167fbb 2507 # get xsrf for annotations or comments
a06916d9 2508 get_annotations = self.get_param('writeannotations', False)
2509 get_comments = self.get_param('getcomments', False)
06167fbb 2510 if get_annotations or get_comments:
29f7c58a 2511 xsrf_token = None
545cc85d 2512 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2513 if ytcfg:
2514 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2515 if not xsrf_token:
2516 xsrf_token = self._search_regex(
2517 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2518 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2519
2520 # annotations
06167fbb 2521 if get_annotations:
64b6a4e9
RA
2522 invideo_url = try_get(
2523 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2524 if xsrf_token and invideo_url:
29f7c58a 2525 xsrf_field_name = None
2526 if ytcfg:
2527 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2528 if not xsrf_field_name:
2529 xsrf_field_name = self._search_regex(
2530 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2531 webpage, 'xsrf field name',
29f7c58a 2532 group='xsrf_field_name', default='session_token')
8a784c74 2533 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2534 self._proto_relative_url(invideo_url),
2535 video_id, note='Downloading annotations',
2536 errnote='Unable to download video annotations', fatal=False,
2537 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2538
277d6ff5 2539 if get_comments:
a1c5d2ca 2540 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2541
545cc85d 2542 self.mark_watched(video_id, player_response)
d77ab8e2 2543
545cc85d 2544 return info
c5e8d7af 2545
5f6a1245 2546
8bdd16b4 2547class YoutubeTabIE(YoutubeBaseInfoExtractor):
2548 IE_DESC = 'YouTube.com tab'
70d5c17b 2549 _VALID_URL = r'''(?x)
2550 https?://
2551 (?:\w+\.)?
2552 (?:
2553 youtube(?:kids)?\.com|
2554 invidio\.us
2555 )/
2556 (?:
fe03a6cd 2557 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 2558 (?P<not_channel>
9ba5705a 2559 feed/|hashtag/|
70d5c17b 2560 (?:playlist|watch)\?.*?\blist=
2561 )|
29f7c58a 2562 (?!(?:%s)\b) # Direct URLs
70d5c17b 2563 )
2564 (?P<id>[^/?\#&]+)
2565 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2566 IE_NAME = 'youtube:tab'
2567
81127aa5 2568 _TESTS = [{
da692b79 2569 'note': 'playlists, multipage',
8bdd16b4 2570 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2571 'playlist_mincount': 94,
2572 'info_dict': {
2573 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2574 'title': 'Игорь Клейнер - Playlists',
2575 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2576 'uploader': 'Игорь Клейнер',
2577 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2578 },
2579 }, {
da692b79 2580 'note': 'playlists, multipage, different order',
8bdd16b4 2581 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2582 'playlist_mincount': 94,
2583 'info_dict': {
2584 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2585 'title': 'Игорь Клейнер - Playlists',
2586 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2587 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2588 'uploader': 'Игорь Клейнер',
8bdd16b4 2589 },
201c1459 2590 }, {
da692b79 2591 'note': 'playlists, series',
201c1459 2592 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2593 'playlist_mincount': 5,
2594 'info_dict': {
2595 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2596 'title': '3Blue1Brown - Playlists',
2597 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 2598 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2599 'uploader': '3Blue1Brown',
201c1459 2600 },
8bdd16b4 2601 }, {
da692b79 2602 'note': 'playlists, singlepage',
8bdd16b4 2603 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2604 'playlist_mincount': 4,
2605 'info_dict': {
2606 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2607 'title': 'ThirstForScience - Playlists',
2608 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2609 'uploader': 'ThirstForScience',
2610 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2611 }
2612 }, {
2613 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2614 'only_matching': True,
2615 }, {
da692b79 2616 'note': 'basic, single video playlist',
0e30a7b9 2617 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2618 'info_dict': {
0e30a7b9 2619 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2620 'uploader': 'Sergey M.',
2621 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2622 'title': 'youtube-dl public playlist',
81127aa5 2623 },
0e30a7b9 2624 'playlist_count': 1,
9291475f 2625 }, {
da692b79 2626 'note': 'empty playlist',
0e30a7b9 2627 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2628 'info_dict': {
0e30a7b9 2629 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2630 'uploader': 'Sergey M.',
2631 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2632 'title': 'youtube-dl empty playlist',
9291475f
PH
2633 },
2634 'playlist_count': 0,
2635 }, {
da692b79 2636 'note': 'Home tab',
8bdd16b4 2637 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2638 'info_dict': {
8bdd16b4 2639 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2640 'title': 'lex will - Home',
2641 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2642 'uploader': 'lex will',
2643 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2644 },
8bdd16b4 2645 'playlist_mincount': 2,
9291475f 2646 }, {
da692b79 2647 'note': 'Videos tab',
8bdd16b4 2648 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2649 'info_dict': {
8bdd16b4 2650 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2651 'title': 'lex will - Videos',
2652 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2653 'uploader': 'lex will',
2654 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2655 },
8bdd16b4 2656 'playlist_mincount': 975,
9291475f 2657 }, {
da692b79 2658 'note': 'Videos tab, sorted by popular',
8bdd16b4 2659 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2660 'info_dict': {
8bdd16b4 2661 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2662 'title': 'lex will - Videos',
2663 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2664 'uploader': 'lex will',
2665 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2666 },
8bdd16b4 2667 'playlist_mincount': 199,
9291475f 2668 }, {
da692b79 2669 'note': 'Playlists tab',
8bdd16b4 2670 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2671 'info_dict': {
8bdd16b4 2672 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2673 'title': 'lex will - Playlists',
2674 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2675 'uploader': 'lex will',
2676 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2677 },
8bdd16b4 2678 'playlist_mincount': 17,
ac7553d0 2679 }, {
da692b79 2680 'note': 'Community tab',
8bdd16b4 2681 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2682 'info_dict': {
8bdd16b4 2683 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2684 'title': 'lex will - Community',
2685 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2686 'uploader': 'lex will',
2687 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2688 },
2689 'playlist_mincount': 18,
87dadd45 2690 }, {
da692b79 2691 'note': 'Channels tab',
8bdd16b4 2692 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2693 'info_dict': {
8bdd16b4 2694 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2695 'title': 'lex will - Channels',
2696 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2697 'uploader': 'lex will',
2698 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2699 },
deaec5af 2700 'playlist_mincount': 12,
cd684175 2701 }, {
2702 'note': 'Search tab',
2703 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
2704 'playlist_mincount': 40,
2705 'info_dict': {
2706 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2707 'title': '3Blue1Brown - Search - linear algebra',
2708 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2709 'uploader': '3Blue1Brown',
2710 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2711 },
6b08cdf6 2712 }, {
a0566bbf 2713 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2714 'only_matching': True,
2715 }, {
a0566bbf 2716 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2717 'only_matching': True,
2718 }, {
a0566bbf 2719 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2720 'only_matching': True,
2721 }, {
2722 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2723 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2724 'info_dict': {
2725 'title': '29C3: Not my department',
2726 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2727 'uploader': 'Christiaan008',
2728 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2729 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2730 },
2731 'playlist_count': 96,
2732 }, {
2733 'note': 'Large playlist',
2734 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2735 'info_dict': {
8bdd16b4 2736 'title': 'Uploads from Cauchemar',
2737 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2738 'uploader': 'Cauchemar',
2739 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2740 },
8bdd16b4 2741 'playlist_mincount': 1123,
2742 }, {
da692b79 2743 'note': 'even larger playlist, 8832 videos',
8bdd16b4 2744 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2745 'only_matching': True,
4b7df0d3
JMF
2746 }, {
2747 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2748 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2749 'info_dict': {
acf757f4
PH
2750 'title': 'Uploads from Interstellar Movie',
2751 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2752 'uploader': 'Interstellar Movie',
8bdd16b4 2753 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2754 },
481cc733 2755 'playlist_mincount': 21,
358de58c 2756 }, {
2757 'note': 'Playlist with "show unavailable videos" button',
2758 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2759 'info_dict': {
2760 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2761 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2762 'uploader': 'Phim Siêu Nhân Nhật Bản',
2763 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2764 },
da692b79 2765 'playlist_mincount': 200,
5d342002 2766 }, {
da692b79 2767 'note': 'Playlist with unavailable videos in page 7',
5d342002 2768 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2769 'info_dict': {
2770 'title': 'Uploads from BlankTV',
2771 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2772 'uploader': 'BlankTV',
2773 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2774 },
da692b79 2775 'playlist_mincount': 1000,
8bdd16b4 2776 }, {
da692b79 2777 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 2778 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2779 'info_dict': {
2780 'title': 'Data Analysis with Dr Mike Pound',
2781 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2782 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2783 'uploader': 'Computerphile',
deaec5af 2784 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2785 },
2786 'playlist_mincount': 11,
2787 }, {
a0566bbf 2788 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2789 'only_matching': True,
dacb3a86 2790 }, {
da692b79 2791 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
2792 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2793 'info_dict': {
2794 'id': 'FqZTN594JQw',
2795 'ext': 'webm',
2796 'title': "Smiley's People 01 detective, Adventure Series, Action",
2797 'uploader': 'STREEM',
2798 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2799 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2800 'upload_date': '20150526',
2801 'license': 'Standard YouTube License',
2802 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2803 'categories': ['People & Blogs'],
2804 'tags': list,
dbdaaa23 2805 'view_count': int,
dacb3a86
S
2806 'like_count': int,
2807 'dislike_count': int,
2808 },
2809 'params': {
2810 'skip_download': True,
2811 },
13a75688 2812 'skip': 'This video is not available.',
dacb3a86 2813 'add_ie': [YoutubeIE.ie_key()],
481cc733 2814 }, {
8bdd16b4 2815 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2816 'only_matching': True,
66b48727 2817 }, {
8bdd16b4 2818 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2819 'only_matching': True,
a0566bbf 2820 }, {
2821 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2822 'info_dict': {
da692b79 2823 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 2824 'ext': 'mp4',
deaec5af 2825 'title': compat_str,
a0566bbf 2826 'uploader': 'Sky News',
2827 'uploader_id': 'skynews',
2828 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 2829 'upload_date': r're:\d{8}',
2830 'description': compat_str,
a0566bbf 2831 'categories': ['News & Politics'],
2832 'tags': list,
2833 'like_count': int,
2834 'dislike_count': int,
2835 },
2836 'params': {
2837 'skip_download': True,
2838 },
da692b79 2839 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 2840 }, {
2841 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2842 'info_dict': {
2843 'id': 'a48o2S1cPoo',
2844 'ext': 'mp4',
2845 'title': 'The Young Turks - Live Main Show',
2846 'uploader': 'The Young Turks',
2847 'uploader_id': 'TheYoungTurks',
2848 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2849 'upload_date': '20150715',
2850 'license': 'Standard YouTube License',
2851 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2852 'categories': ['News & Politics'],
2853 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2854 'like_count': int,
2855 'dislike_count': int,
2856 },
2857 'params': {
2858 'skip_download': True,
2859 },
2860 'only_matching': True,
2861 }, {
2862 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2863 'only_matching': True,
2864 }, {
2865 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2866 'only_matching': True,
09f1580e 2867 }, {
2868 'note': 'A channel that is not live. Should raise error',
2869 'url': 'https://www.youtube.com/user/numberphile/live',
2870 'only_matching': True,
3d3dddc9 2871 }, {
2872 'url': 'https://www.youtube.com/feed/trending',
2873 'only_matching': True,
2874 }, {
3d3dddc9 2875 'url': 'https://www.youtube.com/feed/library',
2876 'only_matching': True,
2877 }, {
3d3dddc9 2878 'url': 'https://www.youtube.com/feed/history',
2879 'only_matching': True,
2880 }, {
3d3dddc9 2881 'url': 'https://www.youtube.com/feed/subscriptions',
2882 'only_matching': True,
2883 }, {
3d3dddc9 2884 'url': 'https://www.youtube.com/feed/watch_later',
2885 'only_matching': True,
2886 }, {
da692b79 2887 'note': 'Recommended - redirects to home page',
3d3dddc9 2888 'url': 'https://www.youtube.com/feed/recommended',
2889 'only_matching': True,
29f7c58a 2890 }, {
da692b79 2891 'note': 'inline playlist with not always working continuations',
29f7c58a 2892 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2893 'only_matching': True,
2894 }, {
2895 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2896 'only_matching': True,
2897 }, {
2898 'url': 'https://www.youtube.com/course',
2899 'only_matching': True,
2900 }, {
2901 'url': 'https://www.youtube.com/zsecurity',
2902 'only_matching': True,
2903 }, {
2904 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2905 'only_matching': True,
2906 }, {
2907 'url': 'https://www.youtube.com/TheYoungTurks/live',
2908 'only_matching': True,
39ed931e 2909 }, {
2910 'url': 'https://www.youtube.com/hashtag/cctv9',
2911 'info_dict': {
2912 'id': 'cctv9',
2913 'title': '#cctv9',
2914 },
2915 'playlist_mincount': 350,
201c1459 2916 }, {
2917 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2918 'only_matching': True,
9297939e 2919 }, {
da692b79 2920 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 2921 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2922 'only_matching': True
fe03a6cd 2923 }, {
2924 'note': '/browse/ should redirect to /channel/',
2925 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
2926 'only_matching': True
2927 }, {
2928 'note': 'VLPL, should redirect to playlist?list=PL...',
2929 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2930 'info_dict': {
2931 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2932 'uploader': 'NoCopyrightSounds',
2933 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
2934 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
2935 'title': 'NCS Releases',
2936 },
2937 'playlist_mincount': 166,
18db7548 2938 }, {
2939 'note': 'Topic, should redirect to playlist?list=UU...',
2940 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
2941 'info_dict': {
2942 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
2943 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
2944 'title': 'Uploads from Royalty Free Music - Topic',
2945 'uploader': 'Royalty Free Music - Topic',
2946 },
2947 'expected_warnings': [
2948 'A channel/user page was given',
2949 'The URL does not have a videos tab',
2950 ],
2951 'playlist_mincount': 101,
2952 }, {
2953 'note': 'Topic without a UU playlist',
2954 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
2955 'info_dict': {
2956 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
2957 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
2958 },
2959 'expected_warnings': [
2960 'A channel/user page was given',
2961 'The URL does not have a videos tab',
2962 'Falling back to channel URL',
2963 ],
2964 'playlist_mincount': 9,
abcdd12b 2965 }, {
2966 'note': 'Youtube music Album',
2967 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
2968 'info_dict': {
2969 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
2970 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
2971 },
2972 'playlist_count': 50,
29f7c58a 2973 }]
2974
2975 @classmethod
2976 def suitable(cls, url):
2977 return False if YoutubeIE.suitable(url) else super(
2978 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2979
2980 def _extract_channel_id(self, webpage):
2981 channel_id = self._html_search_meta(
2982 'channelId', webpage, 'channel id', default=None)
2983 if channel_id:
2984 return channel_id
2985 channel_url = self._html_search_meta(
2986 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2987 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2988 'twitter:app:url:googleplay'), webpage, 'channel url')
2989 return self._search_regex(
2990 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2991 channel_url, 'channel id')
15f6397c 2992
8bdd16b4 2993 @staticmethod
cd7c66cf 2994 def _extract_basic_item_renderer(item):
2995 # Modified from _extract_grid_item_renderer
201c1459 2996 known_basic_renderers = (
2997 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 2998 )
2999 for key, renderer in item.items():
201c1459 3000 if not isinstance(renderer, dict):
cd7c66cf 3001 continue
201c1459 3002 elif key in known_basic_renderers:
3003 return renderer
3004 elif key.startswith('grid') and key.endswith('Renderer'):
3005 return renderer
8bdd16b4 3006
8bdd16b4 3007 def _grid_entries(self, grid_renderer):
3008 for item in grid_renderer['items']:
3009 if not isinstance(item, dict):
39b62db1 3010 continue
cd7c66cf 3011 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3012 if not isinstance(renderer, dict):
3013 continue
3014 title = try_get(
201c1459 3015 renderer, (lambda x: x['title']['runs'][0]['text'],
3016 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 3017 # playlist
3018 playlist_id = renderer.get('playlistId')
3019 if playlist_id:
3020 yield self.url_result(
3021 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3022 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3023 video_title=title)
201c1459 3024 continue
8bdd16b4 3025 # video
3026 video_id = renderer.get('videoId')
3027 if video_id:
3028 yield self._extract_video(renderer)
201c1459 3029 continue
8bdd16b4 3030 # channel
3031 channel_id = renderer.get('channelId')
3032 if channel_id:
3033 title = try_get(
3034 renderer, lambda x: x['title']['simpleText'], compat_str)
3035 yield self.url_result(
3036 'https://www.youtube.com/channel/%s' % channel_id,
3037 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3038 continue
3039 # generic endpoint URL support
3040 ep_url = urljoin('https://www.youtube.com/', try_get(
3041 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3042 compat_str))
3043 if ep_url:
3044 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3045 if ie.suitable(ep_url):
3046 yield self.url_result(
3047 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3048 break
8bdd16b4 3049
3d3dddc9 3050 def _shelf_entries_from_content(self, shelf_renderer):
3051 content = shelf_renderer.get('content')
3052 if not isinstance(content, dict):
8bdd16b4 3053 return
cd7c66cf 3054 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3055 if renderer:
3056 # TODO: add support for nested playlists so each shelf is processed
3057 # as separate playlist
3058 # TODO: this includes only first N items
3059 for entry in self._grid_entries(renderer):
3060 yield entry
3061 renderer = content.get('horizontalListRenderer')
3062 if renderer:
3063 # TODO
3064 pass
8bdd16b4 3065
29f7c58a 3066 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3067 ep = try_get(
3068 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3069 compat_str)
3070 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3071 if shelf_url:
29f7c58a 3072 # Skipping links to another channels, note that checking for
3073 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3074 # will not work
3075 if skip_channels and '/channels?' in shelf_url:
3076 return
3d3dddc9 3077 title = try_get(
3078 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3079 yield self.url_result(shelf_url, video_title=title)
3080 # Shelf may not contain shelf URL, fallback to extraction from content
3081 for entry in self._shelf_entries_from_content(shelf_renderer):
3082 yield entry
c5e8d7af 3083
8bdd16b4 3084 def _playlist_entries(self, video_list_renderer):
3085 for content in video_list_renderer['contents']:
3086 if not isinstance(content, dict):
3087 continue
3088 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3089 if not isinstance(renderer, dict):
3090 continue
3091 video_id = renderer.get('videoId')
3092 if not video_id:
3093 continue
3094 yield self._extract_video(renderer)
07aeced6 3095
3462ffa8 3096 def _rich_entries(self, rich_grid_renderer):
3097 renderer = try_get(
70d5c17b 3098 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3099 video_id = renderer.get('videoId')
3100 if not video_id:
3101 return
3102 yield self._extract_video(renderer)
3103
8bdd16b4 3104 def _video_entry(self, video_renderer):
3105 video_id = video_renderer.get('videoId')
3106 if video_id:
3107 return self._extract_video(video_renderer)
dacb3a86 3108
8bdd16b4 3109 def _post_thread_entries(self, post_thread_renderer):
3110 post_renderer = try_get(
3111 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3112 if not post_renderer:
3113 return
3114 # video attachment
3115 video_renderer = try_get(
895b0931 3116 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3117 video_id = video_renderer.get('videoId')
3118 if video_id:
3119 entry = self._extract_video(video_renderer)
8bdd16b4 3120 if entry:
3121 yield entry
895b0931 3122 # playlist attachment
3123 playlist_id = try_get(
3124 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3125 if playlist_id:
3126 yield self.url_result(
e28f1c0a 3127 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3128 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3129 # inline video links
3130 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3131 for run in runs:
3132 if not isinstance(run, dict):
3133 continue
3134 ep_url = try_get(
3135 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3136 if not ep_url:
3137 continue
3138 if not YoutubeIE.suitable(ep_url):
3139 continue
3140 ep_video_id = YoutubeIE._match_id(ep_url)
3141 if video_id == ep_video_id:
3142 continue
895b0931 3143 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3144
8bdd16b4 3145 def _post_thread_continuation_entries(self, post_thread_continuation):
3146 contents = post_thread_continuation.get('contents')
3147 if not isinstance(contents, list):
3148 return
3149 for content in contents:
3150 renderer = content.get('backstagePostThreadRenderer')
3151 if not isinstance(renderer, dict):
3152 continue
3153 for entry in self._post_thread_entries(renderer):
3154 yield entry
07aeced6 3155
39ed931e 3156 r''' # unused
3157 def _rich_grid_entries(self, contents):
3158 for content in contents:
3159 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3160 if video_renderer:
3161 entry = self._video_entry(video_renderer)
3162 if entry:
3163 yield entry
3164 '''
3165
29f7c58a 3166 @staticmethod
3167 def _build_continuation_query(continuation, ctp=None):
3168 query = {
3169 'ctoken': continuation,
3170 'continuation': continuation,
3171 }
3172 if ctp:
3173 query['itct'] = ctp
3174 return query
3175
8bdd16b4 3176 @staticmethod
3177 def _extract_next_continuation_data(renderer):
3178 next_continuation = try_get(
3179 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3180 if not next_continuation:
3181 return
3182 continuation = next_continuation.get('continuation')
3183 if not continuation:
3184 return
3185 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3186 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3187
8bdd16b4 3188 @classmethod
3189 def _extract_continuation(cls, renderer):
3190 next_continuation = cls._extract_next_continuation_data(renderer)
3191 if next_continuation:
3192 return next_continuation
cc2db878 3193 contents = []
3194 for key in ('contents', 'items'):
3195 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3196 for content in contents:
3197 if not isinstance(content, dict):
3198 continue
3199 continuation_ep = try_get(
3200 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3201 dict)
3202 if not continuation_ep:
3203 continue
3204 continuation = try_get(
3205 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3206 if not continuation:
3207 continue
3208 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3209 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3210
f4f751af 3211 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3212
70d5c17b 3213 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3214 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3215 for content in contents:
3216 if not isinstance(content, dict):
8bdd16b4 3217 continue
70d5c17b 3218 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3219 if not is_renderer:
70d5c17b 3220 renderer = content.get('richItemRenderer')
3462ffa8 3221 if renderer:
3222 for entry in self._rich_entries(renderer):
3223 yield entry
3224 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3225 continue
3462ffa8 3226 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3227 for isr_content in isr_contents:
3228 if not isinstance(isr_content, dict):
3229 continue
69184e41 3230
3231 known_renderers = {
3232 'playlistVideoListRenderer': self._playlist_entries,
3233 'gridRenderer': self._grid_entries,
3234 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3235 'backstagePostThreadRenderer': self._post_thread_entries,
3236 'videoRenderer': lambda x: [self._video_entry(x)],
3237 }
3238 for key, renderer in isr_content.items():
3239 if key not in known_renderers:
3240 continue
3241 for entry in known_renderers[key](renderer):
3242 if entry:
3243 yield entry
3462ffa8 3244 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3245 break
70d5c17b 3246
3462ffa8 3247 if not continuation_list[0]:
3248 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3249
3250 if not continuation_list[0]:
3251 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3252
3253 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3254 tab_content = try_get(tab, lambda x: x['content'], dict)
3255 if not tab_content:
3256 return
3462ffa8 3257 parent_renderer = (
29f7c58a 3258 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3259 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3260 for entry in extract_entries(parent_renderer):
3261 yield entry
3462ffa8 3262 continuation = continuation_list[0]
f4f751af 3263 context = self._extract_context(ytcfg)
3264 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3265
8bdd16b4 3266 for page_num in itertools.count(1):
3267 if not continuation:
3268 break
79360d99 3269 query = {
3270 'continuation': continuation['continuation'],
3271 'clickTracking': {'clickTrackingParams': continuation['itct']}
3272 }
f4f751af 3273 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3274 response = self._extract_response(
3275 item_id='%s page %s' % (item_id, page_num),
3276 query=query, headers=headers, ytcfg=ytcfg,
3277 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3278
3279 if not response:
8bdd16b4 3280 break
f4f751af 3281 visitor_data = try_get(
3282 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3283
69184e41 3284 known_continuation_renderers = {
3285 'playlistVideoListContinuation': self._playlist_entries,
3286 'gridContinuation': self._grid_entries,
3287 'itemSectionContinuation': self._post_thread_continuation_entries,
3288 'sectionListContinuation': extract_entries, # for feeds
3289 }
8bdd16b4 3290 continuation_contents = try_get(
69184e41 3291 response, lambda x: x['continuationContents'], dict) or {}
3292 continuation_renderer = None
3293 for key, value in continuation_contents.items():
3294 if key not in known_continuation_renderers:
3462ffa8 3295 continue
69184e41 3296 continuation_renderer = value
3297 continuation_list = [None]
3298 for entry in known_continuation_renderers[key](continuation_renderer):
3299 yield entry
3300 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3301 break
3302 if continuation_renderer:
3303 continue
c5e8d7af 3304
a1b535bd 3305 known_renderers = {
3306 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3307 'gridVideoRenderer': (self._grid_entries, 'items'),
3308 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3309 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3310 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3311 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3312 }
cce889b9 3313 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3314 continuation_items = try_get(
cce889b9 3315 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3316 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3317 video_items_renderer = None
3318 for key, value in continuation_item.items():
3319 if key not in known_renderers:
8bdd16b4 3320 continue
a1b535bd 3321 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3322 continuation_list = [None]
a1b535bd 3323 for entry in known_renderers[key][0](video_items_renderer):
3324 yield entry
9ba5705a 3325 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3326 break
3327 if video_items_renderer:
3328 continue
8bdd16b4 3329 break
9558dcec 3330
8bdd16b4 3331 @staticmethod
3332 def _extract_selected_tab(tabs):
3333 for tab in tabs:
cd684175 3334 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3335 if renderer.get('selected') is True:
3336 return renderer
2b3c2546 3337 else:
8bdd16b4 3338 raise ExtractorError('Unable to find selected tab')
b82f815f 3339
8bdd16b4 3340 @staticmethod
3341 def _extract_uploader(data):
3342 uploader = {}
3343 sidebar_renderer = try_get(
3344 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3345 if sidebar_renderer:
3346 for item in sidebar_renderer:
3347 if not isinstance(item, dict):
3348 continue
3349 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3350 if not isinstance(renderer, dict):
3351 continue
3352 owner = try_get(
3353 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3354 if owner:
3355 uploader['uploader'] = owner.get('text')
3356 uploader['uploader_id'] = try_get(
3357 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3358 uploader['uploader_url'] = urljoin(
3359 'https://www.youtube.com/',
3360 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3361 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3362
d069eca7 3363 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3364 playlist_id = title = description = channel_url = channel_name = channel_id = None
3365 thumbnails_list = tags = []
3366
8bdd16b4 3367 selected_tab = self._extract_selected_tab(tabs)
3368 renderer = try_get(
3369 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3370 if renderer:
b60419c5 3371 channel_name = renderer.get('title')
3372 channel_url = renderer.get('channelUrl')
3373 channel_id = renderer.get('externalId')
39ed931e 3374 else:
64c0d954 3375 renderer = try_get(
3376 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3377
8bdd16b4 3378 if renderer:
3379 title = renderer.get('title')
ecc97af3 3380 description = renderer.get('description', '')
b60419c5 3381 playlist_id = channel_id
3382 tags = renderer.get('keywords', '').split()
3383 thumbnails_list = (
3384 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3385 or try_get(
3386 data,
3387 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3388 list)
b60419c5 3389 or [])
3390
3391 thumbnails = []
3392 for t in thumbnails_list:
3393 if not isinstance(t, dict):
3394 continue
3395 thumbnail_url = url_or_none(t.get('url'))
3396 if not thumbnail_url:
3397 continue
3398 thumbnails.append({
3399 'url': thumbnail_url,
3400 'width': int_or_none(t.get('width')),
3401 'height': int_or_none(t.get('height')),
3402 })
3462ffa8 3403 if playlist_id is None:
70d5c17b 3404 playlist_id = item_id
3405 if title is None:
39ed931e 3406 title = (
3407 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3408 or playlist_id)
b60419c5 3409 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3410 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3411
3412 metadata = {
3413 'playlist_id': playlist_id,
3414 'playlist_title': title,
3415 'playlist_description': description,
3416 'uploader': channel_name,
3417 'uploader_id': channel_id,
3418 'uploader_url': channel_url,
3419 'thumbnails': thumbnails,
3420 'tags': tags,
3421 }
3422 if not channel_id:
3423 metadata.update(self._extract_uploader(data))
3424 metadata.update({
3425 'channel': metadata['uploader'],
3426 'channel_id': metadata['uploader_id'],
3427 'channel_url': metadata['uploader_url']})
3428 return self.playlist_result(
d069eca7
M
3429 self._entries(
3430 selected_tab, playlist_id,
3431 self._extract_identity_token(webpage, item_id),
f4f751af 3432 self._extract_account_syncid(data),
3433 self._extract_ytcfg(item_id, webpage)),
b60419c5 3434 **metadata)
73c4ac2c 3435
79360d99 3436 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3437 first_id = last_id = None
79360d99 3438 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3439 headers = self._generate_api_headers(
3440 ytcfg, account_syncid=self._extract_account_syncid(data),
3441 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3442 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3443 for page_num in itertools.count(1):
cd7c66cf 3444 videos = list(self._playlist_entries(playlist))
3445 if not videos:
3446 return
2be71994 3447 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3448 if start >= len(videos):
3449 return
3450 for video in videos[start:]:
3451 if video['id'] == first_id:
3452 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3453 return
3454 yield video
3455 first_id = first_id or videos[0]['id']
3456 last_id = videos[-1]['id']
79360d99 3457 watch_endpoint = try_get(
3458 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3459 query = {
3460 'playlistId': playlist_id,
3461 'videoId': watch_endpoint.get('videoId') or last_id,
3462 'index': watch_endpoint.get('index') or len(videos),
3463 'params': watch_endpoint.get('params') or 'OAE%3D'
3464 }
3465 response = self._extract_response(
3466 item_id='%s page %d' % (playlist_id, page_num),
3467 query=query,
3468 ep='next',
3469 headers=headers,
3470 check_get_keys='contents'
3471 )
cd7c66cf 3472 playlist = try_get(
79360d99 3473 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3474
79360d99 3475 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3476 title = playlist.get('title') or try_get(
3477 data, lambda x: x['titleText']['simpleText'], compat_str)
3478 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3479
3480 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3481 playlist_url = urljoin(url, try_get(
3482 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3483 compat_str))
3484 if playlist_url and playlist_url != url:
3485 return self.url_result(
3486 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3487 video_title=title)
cd7c66cf 3488
8bdd16b4 3489 return self.playlist_result(
79360d99 3490 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3491 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3492
95c01b6c 3493 @staticmethod
3494 def _extract_alerts(data):
3495 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3496 if not isinstance(alert_dict, dict):
3497 continue
3498 for alert in alert_dict.values():
3499 alert_type = alert.get('type')
3500 if not alert_type:
02ced43c 3501 continue
95c01b6c 3502 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
3503 if message:
3504 yield alert_type, message
3505 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3506 message += try_get(run, lambda x: x['text'], compat_str)
3507 if message:
3508 yield alert_type, message
3509
3510 def _report_alerts(self, alerts, expected=True):
3ffc7c89 3511 errors = []
3512 warnings = []
95c01b6c 3513 for alert_type, alert_message in alerts:
f3eaa8dd 3514 if alert_type.lower() == 'error':
3ffc7c89 3515 errors.append([alert_type, alert_message])
f3eaa8dd 3516 else:
3ffc7c89 3517 warnings.append([alert_type, alert_message])
f3eaa8dd 3518
3ffc7c89 3519 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3520 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3521 if errors:
3522 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3523
95c01b6c 3524 def _extract_and_report_alerts(self, data, *args, **kwargs):
3525 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
3526
358de58c 3527 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3528 """
3529 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3530 """
3531 sidebar_renderer = try_get(
5d342002 3532 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3533 if not sidebar_renderer:
3534 return
3535 browse_id = params = None
358de58c 3536 for item in sidebar_renderer:
3537 if not isinstance(item, dict):
3538 continue
3539 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3540 menu_renderer = try_get(
3541 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3542 for menu_item in menu_renderer:
3543 if not isinstance(menu_item, dict):
3544 continue
3545 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3546 text = try_get(
3547 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3548 if not text or text.lower() != 'show unavailable videos':
3549 continue
3550 browse_endpoint = try_get(
3551 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3552 browse_id = browse_endpoint.get('browseId')
3553 params = browse_endpoint.get('params')
5d342002 3554 break
3555
3556 ytcfg = self._extract_ytcfg(item_id, webpage)
3557 headers = self._generate_api_headers(
3558 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3559 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3560 visitor_data=try_get(
3561 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3562 query = {
3563 'params': params or 'wgYCCAA=',
3564 'browseId': browse_id or 'VL%s' % item_id
3565 }
3566 return self._extract_response(
3567 item_id=item_id, headers=headers, query=query,
3568 check_get_keys='contents', fatal=False,
3569 note='Downloading API JSON with unavailable videos')
358de58c 3570
79360d99 3571 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3572 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3573 response = None
3574 last_error = None
3575 count = -1
a06916d9 3576 retries = self.get_param('extractor_retries', 3)
79360d99 3577 if check_get_keys is None:
3578 check_get_keys = []
3579 while count < retries:
3580 count += 1
3581 if last_error:
3582 self.report_warning('%s. Retrying ...' % last_error)
3583 try:
3584 response = self._call_api(
3585 ep=ep, fatal=True, headers=headers,
358de58c 3586 video_id=item_id, query=query,
79360d99 3587 context=self._extract_context(ytcfg),
3588 api_key=self._extract_api_key(ytcfg),
3589 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3590 except ExtractorError as e:
3591 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3592 # Downloading page may result in intermittent 5xx HTTP error
3593 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3594 last_error = 'HTTP Error %s' % e.cause.code
3595 if count < retries:
3596 continue
358de58c 3597 if fatal:
3598 raise
3599 else:
3600 self.report_warning(error_to_compat_str(e))
3601 return
3602
79360d99 3603 else:
3604 # Youtube may send alerts if there was an issue with the continuation page
4ba00108 3605 try:
3606 self._extract_and_report_alerts(response, expected=False)
3607 except ExtractorError as e:
3608 if fatal:
3609 raise
3610 self.report_warning(error_to_compat_str(e))
3611 return
79360d99 3612 if not check_get_keys or dict_get(response, check_get_keys):
3613 break
3614 # Youtube sometimes sends incomplete data
3615 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3616 last_error = 'Incomplete data received'
3617 if count >= retries:
358de58c 3618 if fatal:
3619 raise ExtractorError(last_error)
3620 else:
3621 self.report_warning(last_error)
3622 return
79360d99 3623 return response
3624
cd7c66cf 3625 def _extract_webpage(self, url, item_id):
a06916d9 3626 retries = self.get_param('extractor_retries', 3)
62bff2c1 3627 count = -1
c705177d 3628 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3629 while count < retries:
62bff2c1 3630 count += 1
14fdfea9 3631 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3632 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3633 if count:
c705177d 3634 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3635 webpage = self._download_webpage(
3636 url, item_id,
cd7c66cf 3637 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3638 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3639 if data.get('contents') or data.get('currentVideoEndpoint'):
3640 break
95c01b6c 3641 # Extract alerts here only when there is error
3642 self._extract_and_report_alerts(data)
c705177d 3643 if count >= retries:
6a39ee13 3644 raise ExtractorError(last_error)
cd7c66cf 3645 return webpage, data
3646
9297939e 3647 @staticmethod
3648 def _smuggle_data(entries, data):
3649 for entry in entries:
3650 if data:
3651 entry['url'] = smuggle_url(entry['url'], data)
3652 yield entry
3653
cd7c66cf 3654 def _real_extract(self, url):
9297939e 3655 url, smuggled_data = unsmuggle_url(url, {})
3656 if self.is_music_url(url):
3657 smuggled_data['is_music_url'] = True
fe03a6cd 3658 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3659 if info_dict.get('entries'):
3660 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3661 return info_dict
3662
fe03a6cd 3663 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3664
3665 def __real_extract(self, url, smuggled_data):
cd7c66cf 3666 item_id = self._match_id(url)
3667 url = compat_urlparse.urlunparse(
3668 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3669 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3670
fe03a6cd 3671 def get_mobj(url):
3672 mobj = self._url_re.match(url).groupdict()
07cce701 3673 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 3674 return mobj
3675
3676 mobj = get_mobj(url)
3677 # Youtube returns incomplete data if tabname is not lower case
3678 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
3679
3680 if is_channel:
3681 if smuggled_data.get('is_music_url'):
3682 if item_id[:2] == 'VL':
3683 # Youtube music VL channels have an equivalent playlist
3684 item_id = item_id[2:]
3685 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 3686 elif item_id[:2] == 'MP':
3687 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
3688 item_id = self._search_regex(
3689 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
3690 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
3691 'playlist id')
3692 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 3693 elif mobj['channel_type'] == 'browse':
3694 # Youtube music /browse/ should be changed to /channel/
3695 pre = 'https://www.youtube.com/channel/%s' % item_id
3696 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
3697 # Home URLs should redirect to /videos/
6a39ee13 3698 self.report_warning(
cd7c66cf 3699 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3700 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 3701 tab = '/videos'
3702
3703 url = ''.join((pre, tab, post))
3704 mobj = get_mobj(url)
cd7c66cf 3705
3706 # Handle both video/playlist URLs
201c1459 3707 qs = parse_qs(url)
cd7c66cf 3708 video_id = qs.get('v', [None])[0]
3709 playlist_id = qs.get('list', [None])[0]
3710
fe03a6cd 3711 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 3712 if not playlist_id:
fe03a6cd 3713 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 3714 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 3715 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 3716 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3717 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 3718 mobj = get_mobj(url)
cd7c66cf 3719
3720 if video_id and playlist_id:
a06916d9 3721 if self.get_param('noplaylist'):
cd7c66cf 3722 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3723 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3724 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3725
3726 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3727
18db7548 3728 tabs = try_get(
3729 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3730 if tabs:
3731 selected_tab = self._extract_selected_tab(tabs)
3732 tab_name = selected_tab.get('title', '')
09f1580e 3733 if 'no-youtube-channel-redirect' not in compat_opts:
3734 if mobj['tab'] == '/live':
3735 # Live tab should have redirected to the video
3736 raise ExtractorError('The channel is not currently live', expected=True)
3737 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
3738 if not mobj['not_channel'] and item_id[:2] == 'UC':
3739 # Topic channels don't have /videos. Use the equivalent playlist instead
3740 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
3741 pl_id = 'UU%s' % item_id[2:]
3742 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
3743 try:
3744 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
3745 for alert_type, alert_message in self._extract_alerts(pl_data):
3746 if alert_type == 'error':
3747 raise ExtractorError('Youtube said: %s' % alert_message)
3748 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
3749 except ExtractorError:
3750 self.report_warning('The playlist gave error. Falling back to channel URL')
3751 else:
3752 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 3753
3754 self.write_debug('Final URL: %s' % url)
3755
358de58c 3756 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3757 if 'no-youtube-unavailable-videos' not in compat_opts:
3758 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 3759 self._extract_and_report_alerts(data)
358de58c 3760
8bdd16b4 3761 tabs = try_get(
3762 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3763 if tabs:
d069eca7 3764 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3765
8bdd16b4 3766 playlist = try_get(
3767 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3768 if playlist:
79360d99 3769 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3770
a0566bbf 3771 video_id = try_get(
3772 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3773 compat_str) or video_id
8bdd16b4 3774 if video_id:
09f1580e 3775 if mobj['tab'] != '/live': # live tab is expected to redirect to video
3776 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3777 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3778
8bdd16b4 3779 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3780
c5e8d7af 3781
8bdd16b4 3782class YoutubePlaylistIE(InfoExtractor):
3783 IE_DESC = 'YouTube.com playlists'
3784 _VALID_URL = r'''(?x)(?:
3785 (?:https?://)?
3786 (?:\w+\.)?
3787 (?:
3788 (?:
3789 youtube(?:kids)?\.com|
29f7c58a 3790 invidio\.us
8bdd16b4 3791 )
3792 /.*?\?.*?\blist=
3793 )?
3794 (?P<id>%(playlist_id)s)
3795 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3796 IE_NAME = 'youtube:playlist'
cdc628a4 3797 _TESTS = [{
8bdd16b4 3798 'note': 'issue #673',
3799 'url': 'PLBB231211A4F62143',
cdc628a4 3800 'info_dict': {
8bdd16b4 3801 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3802 'id': 'PLBB231211A4F62143',
3803 'uploader': 'Wickydoo',
3804 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3805 },
3806 'playlist_mincount': 29,
3807 }, {
3808 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3809 'info_dict': {
3810 'title': 'YDL_safe_search',
3811 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3812 },
3813 'playlist_count': 2,
3814 'skip': 'This playlist is private',
9558dcec 3815 }, {
8bdd16b4 3816 'note': 'embedded',
3817 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3818 'playlist_count': 4,
9558dcec 3819 'info_dict': {
8bdd16b4 3820 'title': 'JODA15',
3821 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3822 'uploader': 'milan',
3823 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3824 }
cdc628a4 3825 }, {
8bdd16b4 3826 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3827 'playlist_mincount': 982,
3828 'info_dict': {
3829 'title': '2018 Chinese New Singles (11/6 updated)',
3830 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3831 'uploader': 'LBK',
3832 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3833 }
daa0df9e 3834 }, {
29f7c58a 3835 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3836 'only_matching': True,
3837 }, {
3838 # music album playlist
3839 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3840 'only_matching': True,
3841 }]
3842
3843 @classmethod
3844 def suitable(cls, url):
201c1459 3845 if YoutubeTabIE.suitable(url):
3846 return False
1bdae7d3 3847 # Hack for lazy extractors until more generic solution is implemented
3848 # (see #28780)
3849 from .youtube import parse_qs
201c1459 3850 qs = parse_qs(url)
3851 if qs.get('v', [None])[0]:
3852 return False
3853 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3854
3855 def _real_extract(self, url):
3856 playlist_id = self._match_id(url)
46953e7e 3857 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 3858 url = update_url_query(
3859 'https://www.youtube.com/playlist',
3860 parse_qs(url) or {'list': playlist_id})
3861 if is_music_url:
3862 url = smuggle_url(url, {'is_music_url': True})
3863 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 3864
3865
3866class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3867 IE_DESC = 'youtu.be'
29f7c58a 3868 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3869 _TESTS = [{
8bdd16b4 3870 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3871 'info_dict': {
3872 'id': 'yeWKywCrFtk',
3873 'ext': 'mp4',
3874 'title': 'Small Scale Baler and Braiding Rugs',
3875 'uploader': 'Backus-Page House Museum',
3876 'uploader_id': 'backuspagemuseum',
3877 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3878 'upload_date': '20161008',
3879 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3880 'categories': ['Nonprofits & Activism'],
3881 'tags': list,
3882 'like_count': int,
3883 'dislike_count': int,
3884 },
3885 'params': {
3886 'noplaylist': True,
3887 'skip_download': True,
3888 },
39e7107d 3889 }, {
8bdd16b4 3890 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3891 'only_matching': True,
cdc628a4
PH
3892 }]
3893
8bdd16b4 3894 def _real_extract(self, url):
29f7c58a 3895 mobj = re.match(self._VALID_URL, url)
3896 video_id = mobj.group('id')
3897 playlist_id = mobj.group('playlist_id')
8bdd16b4 3898 return self.url_result(
29f7c58a 3899 update_url_query('https://www.youtube.com/watch', {
3900 'v': video_id,
3901 'list': playlist_id,
3902 'feature': 'youtu.be',
3903 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3904
3905
3906class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3907 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3908 _VALID_URL = r'ytuser:(?P<id>.+)'
3909 _TESTS = [{
3910 'url': 'ytuser:phihag',
3911 'only_matching': True,
3912 }]
3913
3914 def _real_extract(self, url):
3915 user_id = self._match_id(url)
3916 return self.url_result(
3917 'https://www.youtube.com/user/%s' % user_id,
3918 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3919
b05654f0 3920
3d3dddc9 3921class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3922 IE_NAME = 'youtube:favorites'
3923 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3924 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3925 _LOGIN_REQUIRED = True
3926 _TESTS = [{
3927 'url': ':ytfav',
3928 'only_matching': True,
3929 }, {
3930 'url': ':ytfavorites',
3931 'only_matching': True,
3932 }]
3933
3934 def _real_extract(self, url):
3935 return self.url_result(
3936 'https://www.youtube.com/playlist?list=LL',
3937 ie=YoutubeTabIE.ie_key())
3938
3939
79360d99 3940class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3941 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3942 # there doesn't appear to be a real limit, for example if you search for
3943 # 'python' you get more than 8.000.000 results
3944 _MAX_RESULTS = float('inf')
78caa52a 3945 IE_NAME = 'youtube:search'
b05654f0 3946 _SEARCH_KEY = 'ytsearch'
6c894ea1 3947 _SEARCH_PARAMS = None
9dd8e46a 3948 _TESTS = []
b05654f0 3949
6c894ea1 3950 def _entries(self, query, n):
a5c56234 3951 data = {'query': query}
6c894ea1
U
3952 if self._SEARCH_PARAMS:
3953 data['params'] = self._SEARCH_PARAMS
3954 total = 0
3955 for page_num in itertools.count(1):
79360d99 3956 search = self._extract_response(
3957 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3958 check_get_keys=('contents', 'onResponseReceivedCommands')
3959 )
6c894ea1 3960 if not search:
b4c08069 3961 break
6c894ea1
U
3962 slr_contents = try_get(
3963 search,
3964 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3965 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3966 list)
3967 if not slr_contents:
a22b2fd1 3968 break
0366ae87 3969
0366ae87
M
3970 # Youtube sometimes adds promoted content to searches,
3971 # changing the index location of videos and token.
3972 # So we search through all entries till we find them.
30a074c2 3973 continuation_token = None
3974 for slr_content in slr_contents:
a96c6d15 3975 if continuation_token is None:
3976 continuation_token = try_get(
3977 slr_content,
3978 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3979 compat_str)
3980
30a074c2 3981 isr_contents = try_get(
3982 slr_content,
3983 lambda x: x['itemSectionRenderer']['contents'],
3984 list)
9da76d30 3985 if not isr_contents:
30a074c2 3986 continue
3987 for content in isr_contents:
3988 if not isinstance(content, dict):
3989 continue
3990 video = content.get('videoRenderer')
3991 if not isinstance(video, dict):
3992 continue
3993 video_id = video.get('videoId')
3994 if not video_id:
3995 continue
3996
3997 yield self._extract_video(video)
3998 total += 1
3999 if total == n:
4000 return
0366ae87 4001
0366ae87 4002 if not continuation_token:
6c894ea1 4003 break
0366ae87 4004 data['continuation'] = continuation_token
b05654f0 4005
6c894ea1
U
4006 def _get_n_results(self, query, n):
4007 """Get a specified number of results for a query"""
4008 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4009
c9ae7b95 4010
a3dd9248 4011class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4012 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4013 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4014 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4015 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4016
c9ae7b95 4017
386e1dd9 4018class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4019 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4020 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4021 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4022 # _MAX_RESULTS = 100
3462ffa8 4023 _TESTS = [{
4024 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4025 'playlist_mincount': 5,
4026 'info_dict': {
4027 'title': 'youtube-dl test video',
4028 }
4029 }, {
4030 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4031 'only_matching': True,
4032 }]
4033
386e1dd9 4034 @classmethod
4035 def _make_valid_url(cls):
4036 return cls._VALID_URL
4037
3462ffa8 4038 def _real_extract(self, url):
386e1dd9 4039 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4040 query = (qs.get('search_query') or qs.get('q'))[0]
4041 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4042 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4043
4044
4045class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4046 """
25f14e9f 4047 Base class for feed extractors
3d3dddc9 4048 Subclasses must define the _FEED_NAME property.
d7ae0639 4049 """
b2e8bc1b 4050 _LOGIN_REQUIRED = True
ef2f3c7f 4051 _TESTS = []
d7ae0639
JMF
4052
4053 @property
4054 def IE_NAME(self):
78caa52a 4055 return 'youtube:%s' % self._FEED_NAME
04cc9617 4056
3853309f 4057 def _real_extract(self, url):
3d3dddc9 4058 return self.url_result(
4059 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4060 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4061
4062
ef2f3c7f 4063class YoutubeWatchLaterIE(InfoExtractor):
4064 IE_NAME = 'youtube:watchlater'
70d5c17b 4065 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4066 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4067 _TESTS = [{
8bdd16b4 4068 'url': ':ytwatchlater',
bc7a9cd8
S
4069 'only_matching': True,
4070 }]
25f14e9f
S
4071
4072 def _real_extract(self, url):
ef2f3c7f 4073 return self.url_result(
4074 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4075
4076
25f14e9f
S
4077class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4078 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4079 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4080 _FEED_NAME = 'recommended'
45db527f 4081 _LOGIN_REQUIRED = False
3d3dddc9 4082 _TESTS = [{
4083 'url': ':ytrec',
4084 'only_matching': True,
4085 }, {
4086 'url': ':ytrecommended',
4087 'only_matching': True,
4088 }, {
4089 'url': 'https://youtube.com',
4090 'only_matching': True,
4091 }]
1ed5b5c9 4092
1ed5b5c9 4093
25f14e9f 4094class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4095 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4096 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4097 _FEED_NAME = 'subscriptions'
3d3dddc9 4098 _TESTS = [{
4099 'url': ':ytsubs',
4100 'only_matching': True,
4101 }, {
4102 'url': ':ytsubscriptions',
4103 'only_matching': True,
4104 }]
1ed5b5c9 4105
1ed5b5c9 4106
25f14e9f 4107class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4108 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4109 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4110 _FEED_NAME = 'history'
3d3dddc9 4111 _TESTS = [{
4112 'url': ':ythistory',
4113 'only_matching': True,
4114 }]
1ed5b5c9
JMF
4115
4116
15870e90
PH
4117class YoutubeTruncatedURLIE(InfoExtractor):
4118 IE_NAME = 'youtube:truncated_url'
4119 IE_DESC = False # Do not list
975d35db 4120 _VALID_URL = r'''(?x)
b95aab84
PH
4121 (?:https?://)?
4122 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4123 (?:watch\?(?:
c4808c60 4124 feature=[a-z_]+|
b95aab84
PH
4125 annotation_id=annotation_[^&]+|
4126 x-yt-cl=[0-9]+|
c1708b89 4127 hl=[^&]*|
287be8c6 4128 t=[0-9]+
b95aab84
PH
4129 )?
4130 |
4131 attribution_link\?a=[^&]+
4132 )
4133 $
975d35db 4134 '''
15870e90 4135
c4808c60 4136 _TESTS = [{
2d3d2997 4137 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4138 'only_matching': True,
dc2fc736 4139 }, {
2d3d2997 4140 'url': 'https://www.youtube.com/watch?',
dc2fc736 4141 'only_matching': True,
b95aab84
PH
4142 }, {
4143 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4144 'only_matching': True,
4145 }, {
4146 'url': 'https://www.youtube.com/watch?feature=foo',
4147 'only_matching': True,
c1708b89
PH
4148 }, {
4149 'url': 'https://www.youtube.com/watch?hl=en-GB',
4150 'only_matching': True,
287be8c6
PH
4151 }, {
4152 'url': 'https://www.youtube.com/watch?t=2372',
4153 'only_matching': True,
c4808c60
PH
4154 }]
4155
15870e90
PH
4156 def _real_extract(self, url):
4157 raise ExtractorError(
78caa52a
PH
4158 'Did you forget to quote the URL? Remember that & is a meta '
4159 'character in most shells, so you want to put the URL in quotes, '
3867038a 4160 'like youtube-dl '
2d3d2997 4161 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4162 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4163 expected=True)
772fd5cc
PH
4164
4165
4166class YoutubeTruncatedIDIE(InfoExtractor):
4167 IE_NAME = 'youtube:truncated_id'
4168 IE_DESC = False # Do not list
b95aab84 4169 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4170
4171 _TESTS = [{
4172 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4173 'only_matching': True,
4174 }]
4175
4176 def _real_extract(self, url):
4177 video_id = self._match_id(url)
4178 raise ExtractorError(
4179 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4180 expected=True)