]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[cleanup] Revert unnecessary changes in 51d9739f8031fb37d8e25b0e9f1abea561e3d2e3
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
bea74222 70 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
9d5d4d64 88
89 def warn(message):
90 self.report_warning(message)
91
92 # username+password login is broken
93 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
94 self.raise_login_required(
95 'Login details are needed to download this content', method='cookies')
68217024 96 username, password = self._get_login_info()
9d5d4d64 97 if username:
98 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
99 return
100 # Everything below this is broken!
101
b2e8bc1b
JMF
102 # No authentication to be performed
103 if username is None:
a06916d9 104 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 105 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 106 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 107 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 108 return True
b2e8bc1b 109
7cc3570e
PH
110 login_page = self._download_webpage(
111 self._LOGIN_URL, None,
69ea8ca4
PH
112 note='Downloading login page',
113 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
114 if login_page is False:
115 return
b2e8bc1b 116
1212e997 117 login_form = self._hidden_inputs(login_page)
c5e8d7af 118
e00eb564
S
119 def req(url, f_req, note, errnote):
120 data = login_form.copy()
121 data.update({
122 'pstMsg': 1,
123 'checkConnection': 'youtube',
124 'checkedDomains': 'youtube',
125 'hl': 'en',
126 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 127 'f.req': json.dumps(f_req),
e00eb564
S
128 'flowName': 'GlifWebSignIn',
129 'flowEntry': 'ServiceLogin',
baf67a60
S
130 # TODO: reverse actual botguard identifier generation algo
131 'bgRequest': '["identifier",""]',
041bc3ad 132 })
e00eb564
S
133 return self._download_json(
134 url, None, note=note, errnote=errnote,
135 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
136 fatal=False,
137 data=urlencode_postdata(data), headers={
138 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
139 'Google-Accounts-XSRF': 1,
140 })
141
3995d37d
S
142 lookup_req = [
143 username,
144 None, [], None, 'US', None, None, 2, False, True,
145 [
146 None, None,
147 [2, 1, None, 1,
148 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
149 None, [], 4],
150 1, [None, None, []], None, None, None, True
151 ],
152 username,
153 ]
154
e00eb564 155 lookup_results = req(
3995d37d 156 self._LOOKUP_URL, lookup_req,
e00eb564
S
157 'Looking up account info', 'Unable to look up account info')
158
159 if lookup_results is False:
160 return False
041bc3ad 161
3995d37d
S
162 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
163 if not user_hash:
164 warn('Unable to extract user hash')
165 return False
166
167 challenge_req = [
168 user_hash,
169 None, 1, None, [1, None, None, None, [password, None, True]],
170 [
171 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
172 1, [None, None, []], None, None, None, True
173 ]]
83317f69 174
3995d37d
S
175 challenge_results = req(
176 self._CHALLENGE_URL, challenge_req,
177 'Logging in', 'Unable to log in')
83317f69 178
3995d37d 179 if challenge_results is False:
e00eb564 180 return
83317f69 181
3995d37d
S
182 login_res = try_get(challenge_results, lambda x: x[0][5], list)
183 if login_res:
184 login_msg = try_get(login_res, lambda x: x[5], compat_str)
185 warn(
186 'Unable to login: %s' % 'Invalid password'
187 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
188 return False
189
190 res = try_get(challenge_results, lambda x: x[0][-1], list)
191 if not res:
192 warn('Unable to extract result entry')
193 return False
194
9a6628aa
S
195 login_challenge = try_get(res, lambda x: x[0][0], list)
196 if login_challenge:
197 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
198 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
199 # SEND_SUCCESS - TFA code has been successfully sent to phone
200 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 201 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
202 if status == 'QUOTA_EXCEEDED':
203 warn('Exceeded the limit of TFA codes, try later')
204 return False
205
206 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
207 if not tl:
208 warn('Unable to extract TL')
209 return False
210
211 tfa_code = self._get_tfa_info('2-step verification code')
212
213 if not tfa_code:
214 warn(
215 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
216 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
217 return False
218
219 tfa_code = remove_start(tfa_code, 'G-')
220
221 tfa_req = [
222 user_hash, None, 2, None,
223 [
224 9, None, None, None, None, None, None, None,
225 [None, tfa_code, True, 2]
226 ]]
227
228 tfa_results = req(
229 self._TFA_URL.format(tl), tfa_req,
230 'Submitting TFA code', 'Unable to submit TFA code')
231
232 if tfa_results is False:
233 return False
234
235 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
236 if tfa_res:
237 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
238 warn(
239 'Unable to finish TFA: %s' % 'Invalid TFA code'
240 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
241 return False
242
243 check_cookie_url = try_get(
244 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
245 else:
246 CHALLENGES = {
247 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
248 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
249 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
250 }
251 challenge = CHALLENGES.get(
252 challenge_str,
253 '%s returned error %s.' % (self.IE_NAME, challenge_str))
254 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
255 return False
3995d37d
S
256 else:
257 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
258
259 if not check_cookie_url:
260 warn('Unable to extract CheckCookie URL')
261 return False
e00eb564
S
262
263 check_cookie_results = self._download_webpage(
3995d37d
S
264 check_cookie_url, None, 'Checking cookie', fatal=False)
265
266 if check_cookie_results is False:
267 return False
e00eb564 268
3995d37d
S
269 if 'https://myaccount.google.com/' not in check_cookie_results:
270 warn('Unable to log in')
b2e8bc1b 271 return False
e00eb564 272
b2e8bc1b
JMF
273 return True
274
cce889b9 275 def _initialize_consent(self):
276 cookies = self._get_cookies('https://www.youtube.com/')
277 if cookies.get('__Secure-3PSID'):
278 return
279 consent_id = None
280 consent = cookies.get('CONSENT')
281 if consent:
282 if 'YES' in consent.value:
283 return
284 consent_id = self._search_regex(
285 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
286 if not consent_id:
287 consent_id = random.randint(100, 999)
288 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 289
b2e8bc1b 290 def _real_initialize(self):
cce889b9 291 self._initialize_consent()
b2e8bc1b
JMF
292 if self._downloader is None:
293 return
b2e8bc1b
JMF
294 if not self._login():
295 return
c5e8d7af 296
f4f751af 297 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
298 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 299 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 300 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
301 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 302
a5c56234 303 def _generate_sapisidhash_header(self):
1974e99f 304 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
305 # See: https://github.com/yt-dlp/yt-dlp/issues/393
306 yt_cookies = self._get_cookies('https://www.youtube.com')
307 sapisid_cookie = dict_get(
308 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
309 if sapisid_cookie is None:
310 return
311 time_now = round(time.time())
1974e99f 312 # SAPISID cookie is required if not already present
313 if not yt_cookies.get('SAPISID'):
314 self._set_cookie(
315 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
316 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
317 sapisidhash = hashlib.sha1(
318 f'{time_now} {sapisid_cookie.value} https://www.youtube.com'.encode('utf-8')).hexdigest()
319 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
320
321 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 322 note='Downloading API JSON', errnote='Unable to download API page',
323 context=None, api_key=None):
324
325 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 326 data.update(query)
f4f751af 327 real_headers = self._generate_api_headers()
328 real_headers.update({'content-type': 'application/json'})
329 if headers:
330 real_headers.update(headers)
545cc85d 331 return self._download_json(
a5c56234
M
332 'https://www.youtube.com/youtubei/v1/%s' % ep,
333 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 334 data=json.dumps(data).encode('utf8'), headers=real_headers,
335 query={'key': api_key or self._extract_api_key()})
336
337 def _extract_api_key(self, ytcfg=None):
338 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 339
8bdd16b4 340 def _extract_yt_initial_data(self, video_id, webpage):
341 return self._parse_json(
342 self._search_regex(
29f7c58a 343 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 344 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 345 video_id)
0c148415 346
a1c5d2ca
M
347 def _extract_identity_token(self, webpage, item_id):
348 ytcfg = self._extract_ytcfg(item_id, webpage)
349 if ytcfg:
350 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
351 if token:
352 return token
353 return self._search_regex(
354 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
355 'identity token', default=None)
356
357 @staticmethod
358 def _extract_account_syncid(data):
8ea3f7b9 359 """
360 Extract syncId required to download private playlists of secondary channels
361 @param data Either response or ytcfg
362 """
363 sync_ids = (try_get(
364 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
365 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
366 if len(sync_ids) >= 2 and sync_ids[1]:
367 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
368 # and just "user_syncid||" for primary channel. We only want the channel_syncid
369 return sync_ids[0]
8ea3f7b9 370 # ytcfg includes channel_syncid if on secondary channel
371 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 372
29f7c58a 373 def _extract_ytcfg(self, video_id, webpage):
8c54a305 374 if not webpage:
375 return {}
29f7c58a 376 return self._parse_json(
377 self._search_regex(
378 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 379 default='{}'), video_id, fatal=False) or {}
380
381 def __extract_client_version(self, ytcfg):
382 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
383
384 def _extract_context(self, ytcfg=None):
385 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
386 if context:
387 return context
388
389 # Recreate the client context (required)
390 client_version = self.__extract_client_version(ytcfg)
391 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
392 context = {
393 'client': {
394 'clientName': client_name,
395 'clientVersion': client_version,
396 }
397 }
398 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
399 if visitor_data:
400 context['client']['visitorData'] = visitor_data
401 return context
402
403 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
404 headers = {
405 'X-YouTube-Client-Name': '1',
406 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
407 }
408 if identity_token:
409 headers['x-youtube-identity-token'] = identity_token
410 if account_syncid:
411 headers['X-Goog-PageId'] = account_syncid
412 headers['X-Goog-AuthUser'] = 0
413 if visitor_data:
414 headers['x-goog-visitor-id'] = visitor_data
415 auth = self._generate_sapisidhash_header()
416 if auth is not None:
417 headers['Authorization'] = auth
418 headers['X-Origin'] = 'https://www.youtube.com'
419 return headers
29f7c58a 420
9297939e 421 @staticmethod
422 def is_music_url(url):
423 return re.match(r'https?://music\.youtube\.com/', url) is not None
424
30a074c2 425 def _extract_video(self, renderer):
426 video_id = renderer.get('videoId')
427 title = try_get(
428 renderer,
429 (lambda x: x['title']['runs'][0]['text'],
430 lambda x: x['title']['simpleText']), compat_str)
431 description = try_get(
432 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
433 compat_str)
434 duration = parse_duration(try_get(
435 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
436 view_count_text = try_get(
437 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
438 view_count = str_to_int(self._search_regex(
439 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
440 'view count', default=None))
441 uploader = try_get(
bc2ca1bb 442 renderer,
443 (lambda x: x['ownerText']['runs'][0]['text'],
444 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 445 return {
39ed931e 446 '_type': 'url',
30a074c2 447 'ie_key': YoutubeIE.ie_key(),
448 'id': video_id,
449 'url': video_id,
450 'title': title,
451 'description': description,
452 'duration': duration,
453 'view_count': view_count,
454 'uploader': uploader,
455 }
456
0c148415 457
360e1ca5 458class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 459 IE_DESC = 'YouTube.com'
bc2ca1bb 460 _INVIDIOUS_SITES = (
461 # invidious-redirect websites
462 r'(?:www\.)?redirect\.invidious\.io',
463 r'(?:(?:www|dev)\.)?invidio\.us',
464 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
465 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 466 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 467 r'(?:(?:www|au)\.)?ytprivate\.com',
468 r'(?:www\.)?invidious\.namazso\.eu',
469 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 470 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
471 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
472 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
473 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
474 # youtube-dl invidious instances list
475 r'(?:(?:www|no)\.)?invidiou\.sh',
476 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
477 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 478 r'(?:www\.)?invidious\.mastodon\.host',
479 r'(?:www\.)?invidious\.zapashcanon\.fr',
480 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 481 r'(?:www\.)?invidious\.tinfoil-hat\.net',
482 r'(?:www\.)?invidious\.himiko\.cloud',
483 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 484 r'(?:www\.)?invidious\.tube',
485 r'(?:www\.)?invidiou\.site',
486 r'(?:www\.)?invidious\.site',
487 r'(?:www\.)?invidious\.xyz',
488 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 489 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 490 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 491 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 492 r'(?:www\.)?tube\.poal\.co',
493 r'(?:www\.)?tube\.connect\.cafe',
494 r'(?:www\.)?vid\.wxzm\.sx',
495 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 496 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 497 r'(?:www\.)?yewtu\.be',
498 r'(?:www\.)?yt\.elukerio\.org',
499 r'(?:www\.)?yt\.lelux\.fi',
500 r'(?:www\.)?invidious\.ggc-project\.de',
501 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 502 r'(?:www\.)?ytprivate\.com',
503 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 504 r'(?:www\.)?invidious\.toot\.koeln',
505 r'(?:www\.)?invidious\.fdn\.fr',
506 r'(?:www\.)?watch\.nettohikari\.com',
507 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
508 r'(?:www\.)?qklhadlycap4cnod\.onion',
509 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
510 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
511 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
512 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
513 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
514 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
515 )
cb7dfeea 516 _VALID_URL = r"""(?x)^
c5e8d7af 517 (
edb53e2d 518 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 519 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
520 (?:www\.)?deturl\.com/www\.youtube\.com|
521 (?:www\.)?pwnyoutube\.com|
522 (?:www\.)?hooktube\.com|
523 (?:www\.)?yourepeat\.com|
524 tube\.majestyc\.net|
525 %(invidious)s|
526 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
527 (?:.*?\#/)? # handle anchor (#/) redirect urls
528 (?: # the various things that can precede the ID:
ac7553d0 529 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 530 |(?: # or the v= param in all its forms
f7000f3a 531 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 532 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 533 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
534 v=
535 )
f4b05232 536 ))
cbaed4bb
S
537 |(?:
538 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
539 vid\.plus| # or vid.plus/xxxx
540 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 541 %(invidious)s
cbaed4bb 542 )/
edb53e2d 543 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 544 )
c5e8d7af 545 )? # all until now is optional -> you can pass the naked ID
201c1459 546 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 547 (?(1).+)? # if we found the ID, everything can follow
9297939e 548 (?:\#|$)""" % {
bc2ca1bb 549 'invidious': '|'.join(_INVIDIOUS_SITES),
550 }
e40c758c 551 _PLAYER_INFO_RE = (
cc2db878 552 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
553 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 554 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 555 )
2c62dc26 556 _formats = {
c2d3cb4c 557 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
558 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
559 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
560 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
561 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
562 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
563 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
564 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 565 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 566 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
567 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
568 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
569 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
570 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
571 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 572 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 573 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
574 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 575
576
577 # 3D videos
c2d3cb4c 578 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
579 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
580 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
581 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 582 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
583 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
584 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 585
96fb5605 586 # Apple HTTP Live Streaming
11f12195 587 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 588 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
589 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
590 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
591 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
592 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 593 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
594 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
595
596 # DASH mp4 video
d23028a8
S
597 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
598 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
599 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
600 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
601 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 602 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
603 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
604 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
605 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
606 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
607 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
608 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 609
f6f1fc92 610 # Dash mp4 audio
d23028a8
S
611 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
612 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
613 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
614 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
615 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
616 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
617 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
618
619 # Dash webm
d23028a8
S
620 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
621 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
622 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
623 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
624 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
625 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
626 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
627 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
628 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
629 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
630 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
631 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
632 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
633 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
634 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 635 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
636 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
637 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
638 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
639 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
640 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
641 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
642
643 # Dash webm audio
d23028a8
S
644 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
645 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 646
0857baad 647 # Dash webm audio with opus inside
d23028a8
S
648 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
649 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
650 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 651
ce6b9a2d
PH
652 # RTMP (unnamed)
653 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
654
655 # av01 video only formats sometimes served with "unknown" codecs
656 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
657 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
658 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
659 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 660 }
29f7c58a 661 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 662
fd5c4aab
S
663 _GEO_BYPASS = False
664
78caa52a 665 IE_NAME = 'youtube'
2eb88d95
PH
666 _TESTS = [
667 {
2d3d2997 668 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
669 'info_dict': {
670 'id': 'BaW_jenozKc',
671 'ext': 'mp4',
3867038a 672 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
673 'uploader': 'Philipp Hagemeister',
674 'uploader_id': 'phihag',
ec85ded8 675 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
676 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
677 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 678 'upload_date': '20121002',
3867038a 679 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 680 'categories': ['Science & Technology'],
3867038a 681 'tags': ['youtube-dl'],
556dbe7f 682 'duration': 10,
dbdaaa23 683 'view_count': int,
3e7c1224
PH
684 'like_count': int,
685 'dislike_count': int,
7c80519c 686 'start_time': 1,
297a564b 687 'end_time': 9,
2eb88d95 688 }
0e853ca4 689 },
fccd3771 690 {
4bc3a23e
PH
691 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
692 'note': 'Embed-only video (#1746)',
693 'info_dict': {
694 'id': 'yZIXLfi8CZQ',
695 'ext': 'mp4',
696 'upload_date': '20120608',
697 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
698 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
699 'uploader': 'SET India',
94bfcd23 700 'uploader_id': 'setindia',
ec85ded8 701 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 702 'age_limit': 18,
545cc85d 703 },
704 'skip': 'Private video',
fccd3771 705 },
11b56058 706 {
8bdd16b4 707 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
708 'note': 'Use the first video ID in the URL',
709 'info_dict': {
710 'id': 'BaW_jenozKc',
711 'ext': 'mp4',
3867038a 712 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
713 'uploader': 'Philipp Hagemeister',
714 'uploader_id': 'phihag',
ec85ded8 715 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 716 'upload_date': '20121002',
3867038a 717 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 718 'categories': ['Science & Technology'],
3867038a 719 'tags': ['youtube-dl'],
556dbe7f 720 'duration': 10,
dbdaaa23 721 'view_count': int,
11b56058
PM
722 'like_count': int,
723 'dislike_count': int,
34a7de29
S
724 },
725 'params': {
726 'skip_download': True,
727 },
11b56058 728 },
dd27fd17 729 {
2d3d2997 730 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
731 'note': '256k DASH audio (format 141) via DASH manifest',
732 'info_dict': {
733 'id': 'a9LDPn-MO4I',
734 'ext': 'm4a',
735 'upload_date': '20121002',
736 'uploader_id': '8KVIDEO',
ec85ded8 737 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
738 'description': '',
739 'uploader': '8KVIDEO',
740 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 741 },
4bc3a23e
PH
742 'params': {
743 'youtube_include_dash_manifest': True,
744 'format': '141',
4919603f 745 },
de3c7fe0 746 'skip': 'format 141 not served anymore',
dd27fd17 747 },
8bdd16b4 748 # DASH manifest with encrypted signature
749 {
750 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
751 'info_dict': {
752 'id': 'IB3lcPjvWLA',
753 'ext': 'm4a',
754 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
755 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
756 'duration': 244,
757 'uploader': 'AfrojackVEVO',
758 'uploader_id': 'AfrojackVEVO',
759 'upload_date': '20131011',
cc2db878 760 'abr': 129.495,
8bdd16b4 761 },
762 'params': {
763 'youtube_include_dash_manifest': True,
764 'format': '141/bestaudio[ext=m4a]',
765 },
766 },
aa79ac0c
PH
767 # Controversy video
768 {
769 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
770 'info_dict': {
771 'id': 'T4XJQO3qol8',
772 'ext': 'mp4',
556dbe7f 773 'duration': 219,
aa79ac0c 774 'upload_date': '20100909',
4fe54c12 775 'uploader': 'Amazing Atheist',
aa79ac0c 776 'uploader_id': 'TheAmazingAtheist',
ec85ded8 777 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 778 'title': 'Burning Everyone\'s Koran',
545cc85d 779 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 780 }
c522adb1 781 },
dd2d55f1 782 # Normal age-gate video (embed allowed)
c522adb1 783 {
2d3d2997 784 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
785 'info_dict': {
786 'id': 'HtVdAasjOgU',
787 'ext': 'mp4',
788 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 789 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 790 'duration': 142,
c522adb1
JMF
791 'uploader': 'The Witcher',
792 'uploader_id': 'WitcherGame',
ec85ded8 793 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 794 'upload_date': '20140605',
34952f09 795 'age_limit': 18,
c522adb1
JMF
796 },
797 },
8bdd16b4 798 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
799 # YouTube Red ad is not captured for creator
800 {
801 'url': '__2ABJjxzNo',
802 'info_dict': {
803 'id': '__2ABJjxzNo',
804 'ext': 'mp4',
805 'duration': 266,
806 'upload_date': '20100430',
807 'uploader_id': 'deadmau5',
808 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 809 'creator': 'deadmau5',
810 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 811 'uploader': 'deadmau5',
812 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 813 'alt_title': 'Some Chords',
8bdd16b4 814 },
815 'expected_warnings': [
816 'DASH manifest missing',
817 ]
818 },
067aa17e 819 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
820 {
821 'url': 'lqQg6PlCWgI',
822 'info_dict': {
823 'id': 'lqQg6PlCWgI',
824 'ext': 'mp4',
556dbe7f 825 'duration': 6085,
90227264 826 'upload_date': '20150827',
cbe2bd91 827 'uploader_id': 'olympic',
ec85ded8 828 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 829 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 830 'uploader': 'Olympic',
cbe2bd91
PH
831 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
832 },
833 'params': {
834 'skip_download': 'requires avconv',
e52a40ab 835 }
cbe2bd91 836 },
6271f1ca
PH
837 # Non-square pixels
838 {
839 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
840 'info_dict': {
841 'id': '_b-2C3KPAM0',
842 'ext': 'mp4',
843 'stretched_ratio': 16 / 9.,
556dbe7f 844 'duration': 85,
6271f1ca
PH
845 'upload_date': '20110310',
846 'uploader_id': 'AllenMeow',
ec85ded8 847 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 848 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 849 'uploader': '孫ᄋᄅ',
6271f1ca
PH
850 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
851 },
06b491eb
S
852 },
853 # url_encoded_fmt_stream_map is empty string
854 {
855 'url': 'qEJwOuvDf7I',
856 'info_dict': {
857 'id': 'qEJwOuvDf7I',
f57b7835 858 'ext': 'webm',
06b491eb
S
859 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
860 'description': '',
861 'upload_date': '20150404',
862 'uploader_id': 'spbelect',
863 'uploader': 'Наблюдатели Петербурга',
864 },
865 'params': {
866 'skip_download': 'requires avconv',
e323cf3f
S
867 },
868 'skip': 'This live event has ended.',
06b491eb 869 },
067aa17e 870 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
871 {
872 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
873 'info_dict': {
874 'id': 'FIl7x6_3R5Y',
eb6793ba 875 'ext': 'webm',
da77d856
S
876 'title': 'md5:7b81415841e02ecd4313668cde88737a',
877 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 878 'duration': 220,
da77d856
S
879 'upload_date': '20150625',
880 'uploader_id': 'dorappi2000',
ec85ded8 881 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 882 'uploader': 'dorappi2000',
eb6793ba 883 'formats': 'mincount:31',
da77d856 884 },
eb6793ba 885 'skip': 'not actual anymore',
2ee8f5d8 886 },
8a1a26ce
YCH
887 # DASH manifest with segment_list
888 {
889 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
890 'md5': '8ce563a1d667b599d21064e982ab9e31',
891 'info_dict': {
892 'id': 'CsmdDsKjzN8',
893 'ext': 'mp4',
17ee98e1 894 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
895 'uploader': 'Airtek',
896 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
897 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
898 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
899 },
900 'params': {
901 'youtube_include_dash_manifest': True,
902 'format': '135', # bestvideo
be49068d
S
903 },
904 'skip': 'This live event has ended.',
2ee8f5d8 905 },
cf7e015f
S
906 {
907 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 908 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 909 'info_dict': {
545cc85d 910 'id': 'jvGDaLqkpTg',
911 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
912 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
913 },
914 'playlist': [{
915 'info_dict': {
545cc85d 916 'id': 'jvGDaLqkpTg',
cf7e015f 917 'ext': 'mp4',
545cc85d 918 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
919 'description': 'md5:e03b909557865076822aa169218d6a5d',
920 'duration': 10643,
921 'upload_date': '20161111',
922 'uploader': 'Team PGP',
923 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
924 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
925 },
926 }, {
927 'info_dict': {
545cc85d 928 'id': '3AKt1R1aDnw',
cf7e015f 929 'ext': 'mp4',
545cc85d 930 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
931 'description': 'md5:e03b909557865076822aa169218d6a5d',
932 'duration': 10991,
933 'upload_date': '20161111',
934 'uploader': 'Team PGP',
935 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
936 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
937 },
938 }, {
939 'info_dict': {
545cc85d 940 'id': 'RtAMM00gpVc',
cf7e015f 941 'ext': 'mp4',
545cc85d 942 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
943 'description': 'md5:e03b909557865076822aa169218d6a5d',
944 'duration': 10995,
945 'upload_date': '20161111',
946 'uploader': 'Team PGP',
947 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
948 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
949 },
950 }, {
951 'info_dict': {
545cc85d 952 'id': '6N2fdlP3C5U',
cf7e015f 953 'ext': 'mp4',
545cc85d 954 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
955 'description': 'md5:e03b909557865076822aa169218d6a5d',
956 'duration': 10990,
957 'upload_date': '20161111',
958 'uploader': 'Team PGP',
959 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
960 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
961 },
962 }],
963 'params': {
964 'skip_download': True,
965 },
cbaed4bb 966 },
f9f49d87 967 {
067aa17e 968 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
969 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
970 'info_dict': {
971 'id': 'gVfLd0zydlo',
972 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
973 },
974 'playlist_count': 2,
be49068d 975 'skip': 'Not multifeed anymore',
f9f49d87 976 },
cbaed4bb 977 {
2d3d2997 978 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 979 'only_matching': True,
0e49d9a6 980 },
6d4fc66b 981 {
2d3d2997 982 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
983 'only_matching': True,
984 },
0e49d9a6 985 {
067aa17e 986 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 987 # Also tests cut-off URL expansion in video description (see
067aa17e
S
988 # https://github.com/ytdl-org/youtube-dl/issues/1892,
989 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
990 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
991 'info_dict': {
992 'id': 'lsguqyKfVQg',
993 'ext': 'mp4',
994 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 995 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 996 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 997 'duration': 133,
0e49d9a6
LL
998 'upload_date': '20151119',
999 'uploader_id': 'IronSoulElf',
ec85ded8 1000 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1001 'uploader': 'IronSoulElf',
eb6793ba
S
1002 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1003 'track': 'Dark Walk - Position Music',
1004 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1005 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1006 },
1007 'params': {
1008 'skip_download': True,
1009 },
1010 },
61f92af1 1011 {
067aa17e 1012 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1013 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1014 'only_matching': True,
1015 },
313dfc45
LL
1016 {
1017 # Video with yt:stretch=17:0
1018 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1019 'info_dict': {
1020 'id': 'Q39EVAstoRM',
1021 'ext': 'mp4',
1022 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1023 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1024 'upload_date': '20151107',
1025 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1026 'uploader': 'CH GAMER DROID',
1027 },
1028 'params': {
1029 'skip_download': True,
1030 },
be49068d 1031 'skip': 'This video does not exist.',
313dfc45 1032 },
201c1459 1033 {
1034 # Video with incomplete 'yt:stretch=16:'
1035 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1036 'only_matching': True,
1037 },
7caf9830
S
1038 {
1039 # Video licensed under Creative Commons
1040 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1041 'info_dict': {
1042 'id': 'M4gD1WSo5mA',
1043 'ext': 'mp4',
1044 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1045 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1046 'duration': 721,
7caf9830
S
1047 'upload_date': '20150127',
1048 'uploader_id': 'BerkmanCenter',
ec85ded8 1049 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1050 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1051 'license': 'Creative Commons Attribution license (reuse allowed)',
1052 },
1053 'params': {
1054 'skip_download': True,
1055 },
1056 },
fd050249
S
1057 {
1058 # Channel-like uploader_url
1059 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1060 'info_dict': {
1061 'id': 'eQcmzGIKrzg',
1062 'ext': 'mp4',
1063 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1064 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1065 'duration': 4060,
fd050249 1066 'upload_date': '20151119',
eb6793ba 1067 'uploader': 'Bernie Sanders',
fd050249 1068 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1069 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1070 'license': 'Creative Commons Attribution license (reuse allowed)',
1071 },
1072 'params': {
1073 'skip_download': True,
1074 },
1075 },
040ac686
S
1076 {
1077 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1078 'only_matching': True,
7f29cf54
S
1079 },
1080 {
067aa17e 1081 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1082 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1083 'only_matching': True,
6496ccb4
S
1084 },
1085 {
1086 # Rental video preview
1087 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1088 'info_dict': {
1089 'id': 'uGpuVWrhIzE',
1090 'ext': 'mp4',
1091 'title': 'Piku - Trailer',
1092 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1093 'upload_date': '20150811',
1094 'uploader': 'FlixMatrix',
1095 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1096 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1097 'license': 'Standard YouTube License',
1098 },
1099 'params': {
1100 'skip_download': True,
1101 },
eb6793ba 1102 'skip': 'This video is not available.',
022a5d66 1103 },
12afdc2a
S
1104 {
1105 # YouTube Red video with episode data
1106 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1107 'info_dict': {
1108 'id': 'iqKdEhx-dD4',
1109 'ext': 'mp4',
1110 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1111 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1112 'duration': 2085,
12afdc2a
S
1113 'upload_date': '20170118',
1114 'uploader': 'Vsauce',
1115 'uploader_id': 'Vsauce',
1116 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1117 'series': 'Mind Field',
1118 'season_number': 1,
1119 'episode_number': 1,
1120 },
1121 'params': {
1122 'skip_download': True,
1123 },
1124 'expected_warnings': [
1125 'Skipping DASH manifest',
1126 ],
1127 },
c7121fa7
S
1128 {
1129 # The following content has been identified by the YouTube community
1130 # as inappropriate or offensive to some audiences.
1131 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1132 'info_dict': {
1133 'id': '6SJNVb0GnPI',
1134 'ext': 'mp4',
1135 'title': 'Race Differences in Intelligence',
1136 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1137 'duration': 965,
1138 'upload_date': '20140124',
1139 'uploader': 'New Century Foundation',
1140 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1141 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1142 },
1143 'params': {
1144 'skip_download': True,
1145 },
545cc85d 1146 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1147 },
022a5d66
S
1148 {
1149 # itag 212
1150 'url': '1t24XAntNCY',
1151 'only_matching': True,
fd5c4aab
S
1152 },
1153 {
1154 # geo restricted to JP
1155 'url': 'sJL6WA-aGkQ',
1156 'only_matching': True,
1157 },
cd5a74a2
S
1158 {
1159 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1160 'only_matching': True,
1161 },
bc2ca1bb 1162 {
1163 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1164 'only_matching': True,
1165 },
1166 {
1167 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1168 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1169 'only_matching': True,
1170 },
825cd268
RA
1171 {
1172 # DRM protected
1173 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1174 'only_matching': True,
4fe54c12
S
1175 },
1176 {
1177 # Video with unsupported adaptive stream type formats
1178 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1179 'info_dict': {
1180 'id': 'Z4Vy8R84T1U',
1181 'ext': 'mp4',
1182 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1183 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1184 'duration': 433,
1185 'upload_date': '20130923',
1186 'uploader': 'Amelia Putri Harwita',
1187 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1188 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1189 'formats': 'maxcount:10',
1190 },
1191 'params': {
1192 'skip_download': True,
1193 'youtube_include_dash_manifest': False,
1194 },
5429d6a9 1195 'skip': 'not actual anymore',
5caabd3c 1196 },
1197 {
822b9d9c 1198 # Youtube Music Auto-generated description
5caabd3c 1199 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1200 'info_dict': {
1201 'id': 'MgNrAu2pzNs',
1202 'ext': 'mp4',
1203 'title': 'Voyeur Girl',
1204 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1205 'upload_date': '20190312',
5429d6a9
S
1206 'uploader': 'Stephen - Topic',
1207 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1208 'artist': 'Stephen',
1209 'track': 'Voyeur Girl',
1210 'album': 'it\'s too much love to know my dear',
1211 'release_date': '20190313',
1212 'release_year': 2019,
1213 },
1214 'params': {
1215 'skip_download': True,
1216 },
1217 },
66b48727
RA
1218 {
1219 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1220 'only_matching': True,
1221 },
011e75e6
S
1222 {
1223 # invalid -> valid video id redirection
1224 'url': 'DJztXj2GPfl',
1225 'info_dict': {
1226 'id': 'DJztXj2GPfk',
1227 'ext': 'mp4',
1228 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1229 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1230 'upload_date': '20090125',
1231 'uploader': 'Prochorowka',
1232 'uploader_id': 'Prochorowka',
1233 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1234 'artist': 'Panjabi MC',
1235 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1236 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1237 },
1238 'params': {
1239 'skip_download': True,
1240 },
545cc85d 1241 'skip': 'Video unavailable',
ea74e00b
DP
1242 },
1243 {
1244 # empty description results in an empty string
1245 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1246 'info_dict': {
1247 'id': 'x41yOUIvK2k',
1248 'ext': 'mp4',
1249 'title': 'IMG 3456',
1250 'description': '',
1251 'upload_date': '20170613',
1252 'uploader_id': 'ElevageOrVert',
1253 'uploader': 'ElevageOrVert',
1254 },
1255 'params': {
1256 'skip_download': True,
1257 },
1258 },
a0566bbf 1259 {
29f7c58a 1260 # with '};' inside yt initial data (see [1])
1261 # see [2] for an example with '};' inside ytInitialPlayerResponse
1262 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1263 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1264 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1265 'info_dict': {
1266 'id': 'CHqg6qOn4no',
1267 'ext': 'mp4',
1268 'title': 'Part 77 Sort a list of simple types in c#',
1269 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1270 'upload_date': '20130831',
1271 'uploader_id': 'kudvenkat',
1272 'uploader': 'kudvenkat',
1273 },
1274 'params': {
1275 'skip_download': True,
1276 },
1277 },
29f7c58a 1278 {
1279 # another example of '};' in ytInitialData
1280 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1281 'only_matching': True,
1282 },
1283 {
1284 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1285 'only_matching': True,
1286 },
545cc85d 1287 {
cc2db878 1288 # https://github.com/ytdl-org/youtube-dl/pull/28094
1289 'url': 'OtqTfy26tG0',
1290 'info_dict': {
1291 'id': 'OtqTfy26tG0',
1292 'ext': 'mp4',
1293 'title': 'Burn Out',
1294 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1295 'upload_date': '20141120',
1296 'uploader': 'The Cinematic Orchestra - Topic',
1297 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1299 'artist': 'The Cinematic Orchestra',
1300 'track': 'Burn Out',
1301 'album': 'Every Day',
1302 'release_data': None,
1303 'release_year': None,
1304 },
1305 'params': {
1306 'skip_download': True,
1307 },
545cc85d 1308 },
bc2ca1bb 1309 {
1310 # controversial video, only works with bpctr when authenticated with cookies
1311 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1312 'only_matching': True,
1313 },
f7ad7160 1314 {
1315 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1316 'url': 'cBvYw8_A0vQ',
1317 'info_dict': {
1318 'id': 'cBvYw8_A0vQ',
1319 'ext': 'mp4',
1320 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1321 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1322 'upload_date': '20201120',
1323 'uploader': 'Walk around Japan',
1324 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1325 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1326 },
1327 'params': {
1328 'skip_download': True,
1329 },
0fb983f6 1330 }, {
1331 # Has multiple audio streams
1332 'url': 'WaOKSUlf4TM',
1333 'only_matching': True
9297939e 1334 }, {
1335 # Requires Premium: has format 141 when requested using YTM url
1336 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1337 'only_matching': True
1338 }, {
120916da 1339 # multiple subtitles with same lang_code
1340 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1341 'only_matching': True,
1342 },
2eb88d95
PH
1343 ]
1344
201c1459 1345 @classmethod
1346 def suitable(cls, url):
1bdae7d3 1347 # Hack for lazy extractors until more generic solution is implemented
1348 # (see #28780)
1349 from .youtube import parse_qs
201c1459 1350 qs = parse_qs(url)
1351 if qs.get('list', [None])[0]:
1352 return False
1353 return super(YoutubeIE, cls).suitable(url)
1354
e0df6211
PH
1355 def __init__(self, *args, **kwargs):
1356 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1357 self._code_cache = {}
83799698 1358 self._player_cache = {}
e0df6211 1359
60064c53
PH
1360 def _signature_cache_id(self, example_sig):
1361 """ Return a string representation of a signature """
78caa52a 1362 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1363
e40c758c
S
1364 @classmethod
1365 def _extract_player_info(cls, player_url):
1366 for player_re in cls._PLAYER_INFO_RE:
1367 id_m = re.search(player_re, player_url)
1368 if id_m:
1369 break
1370 else:
c081b35c 1371 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1372 return id_m.group('id')
e40c758c
S
1373
1374 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1375 player_id = self._extract_player_info(player_url)
e0df6211 1376
c4417ddb 1377 # Read from filesystem cache
545cc85d 1378 func_id = 'js_%s_%s' % (
1379 player_id, self._signature_cache_id(example_sig))
c4417ddb 1380 assert os.path.basename(func_id) == func_id
a0e07d31 1381
69ea8ca4 1382 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1383 if cache_spec is not None:
78caa52a 1384 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1385
545cc85d 1386 if player_id not in self._code_cache:
1387 self._code_cache[player_id] = self._download_webpage(
e0df6211 1388 player_url, video_id,
545cc85d 1389 note='Downloading player ' + player_id,
69ea8ca4 1390 errnote='Download of %s failed' % player_url)
545cc85d 1391 code = self._code_cache[player_id]
1392 res = self._parse_sig_js(code)
e0df6211 1393
785521bf
PH
1394 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1395 cache_res = res(test_string)
1396 cache_spec = [ord(c) for c in cache_res]
83799698 1397
69ea8ca4 1398 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1399 return res
1400
60064c53 1401 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1402 def gen_sig_code(idxs):
1403 def _genslice(start, end, step):
78caa52a 1404 starts = '' if start == 0 else str(start)
8bcc8756 1405 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1406 steps = '' if step == 1 else (':%d' % step)
78caa52a 1407 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1408
1409 step = None
7af808a5
PH
1410 # Quelch pyflakes warnings - start will be set when step is set
1411 start = '(Never used)'
edf3e38e
PH
1412 for i, prev in zip(idxs[1:], idxs[:-1]):
1413 if step is not None:
1414 if i - prev == step:
1415 continue
1416 yield _genslice(start, prev, step)
1417 step = None
1418 continue
1419 if i - prev in [-1, 1]:
1420 step = i - prev
1421 start = prev
1422 continue
1423 else:
78caa52a 1424 yield 's[%d]' % prev
edf3e38e 1425 if step is None:
78caa52a 1426 yield 's[%d]' % i
edf3e38e
PH
1427 else:
1428 yield _genslice(start, i, step)
1429
78caa52a 1430 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1431 cache_res = func(test_string)
edf3e38e 1432 cache_spec = [ord(c) for c in cache_res]
78caa52a 1433 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1434 signature_id_tuple = '(%s)' % (
1435 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1436 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1437 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1438 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1439
e0df6211
PH
1440 def _parse_sig_js(self, jscode):
1441 funcname = self._search_regex(
abefc03f
S
1442 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1443 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1444 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1445 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1446 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1447 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1448 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1449 # Obsolete patterns
1450 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1451 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1452 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1453 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1454 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1455 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1456 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1457 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1458 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1459
1460 jsi = JSInterpreter(jscode)
1461 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1462 return lambda s: initial_function([s])
1463
545cc85d 1464 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1465 """Turn the encrypted s field into a working signature"""
6b37f0be 1466
c8bf86d5 1467 if player_url is None:
69ea8ca4 1468 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1469
69ea8ca4 1470 if player_url.startswith('//'):
78caa52a 1471 player_url = 'https:' + player_url
3c90cc8b
S
1472 elif not re.match(r'https?://', player_url):
1473 player_url = compat_urlparse.urljoin(
1474 'https://www.youtube.com', player_url)
c8bf86d5 1475 try:
62af3a0e 1476 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1477 if player_id not in self._player_cache:
1478 func = self._extract_signature_function(
60064c53 1479 video_id, player_url, s
c8bf86d5
PH
1480 )
1481 self._player_cache[player_id] = func
1482 func = self._player_cache[player_id]
a06916d9 1483 if self.get_param('youtube_print_sig_code'):
60064c53 1484 self._print_sig_code(func, s)
c8bf86d5
PH
1485 return func(s)
1486 except Exception as e:
1487 tb = traceback.format_exc()
1488 raise ExtractorError(
78caa52a 1489 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1490
545cc85d 1491 def _mark_watched(self, video_id, player_response):
21c340b8
S
1492 playback_url = url_or_none(try_get(
1493 player_response,
545cc85d 1494 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1495 if not playback_url:
1496 return
1497 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1498 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1499
1500 # cpn generation algorithm is reverse engineered from base.js.
1501 # In fact it works even with dummy cpn.
1502 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1503 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1504
1505 qs.update({
1506 'ver': ['2'],
1507 'cpn': [cpn],
1508 })
1509 playback_url = compat_urlparse.urlunparse(
15707c7e 1510 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1511
1512 self._download_webpage(
1513 playback_url, video_id, 'Marking watched',
1514 'Unable to mark watched', fatal=False)
1515
66c9fa36
S
1516 @staticmethod
1517 def _extract_urls(webpage):
1518 # Embedded YouTube player
1519 entries = [
1520 unescapeHTML(mobj.group('url'))
1521 for mobj in re.finditer(r'''(?x)
1522 (?:
1523 <iframe[^>]+?src=|
1524 data-video-url=|
1525 <embed[^>]+?src=|
1526 embedSWF\(?:\s*|
1527 <object[^>]+data=|
1528 new\s+SWFObject\(
1529 )
1530 (["\'])
1531 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1532 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1533 \1''', webpage)]
1534
1535 # lazyYT YouTube embed
1536 entries.extend(list(map(
1537 unescapeHTML,
1538 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1539
1540 # Wordpress "YouTube Video Importer" plugin
1541 matches = re.findall(r'''(?x)<div[^>]+
1542 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1543 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1544 entries.extend(m[-1] for m in matches)
1545
1546 return entries
1547
1548 @staticmethod
1549 def _extract_url(webpage):
1550 urls = YoutubeIE._extract_urls(webpage)
1551 return urls[0] if urls else None
1552
97665381
PH
1553 @classmethod
1554 def extract_id(cls, url):
1555 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1556 if mobj is None:
69ea8ca4 1557 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1558 video_id = mobj.group(2)
1559 return video_id
1560
545cc85d 1561 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1562 chapters_list = try_get(
8bdd16b4 1563 data,
84213ea8
S
1564 lambda x: x['playerOverlays']
1565 ['playerOverlayRenderer']
1566 ['decoratedPlayerBarRenderer']
1567 ['decoratedPlayerBarRenderer']
1568 ['playerBar']
1569 ['chapteredPlayerBarRenderer']
1570 ['chapters'],
1571 list)
1572 if not chapters_list:
1573 return
1574
1575 def chapter_time(chapter):
1576 return float_or_none(
1577 try_get(
1578 chapter,
1579 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1580 int),
1581 scale=1000)
1582 chapters = []
1583 for next_num, chapter in enumerate(chapters_list, start=1):
1584 start_time = chapter_time(chapter)
1585 if start_time is None:
1586 continue
1587 end_time = (chapter_time(chapters_list[next_num])
1588 if next_num < len(chapters_list) else duration)
1589 if end_time is None:
1590 continue
1591 title = try_get(
1592 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1593 compat_str)
1594 chapters.append({
1595 'start_time': start_time,
1596 'end_time': end_time,
1597 'title': title,
1598 })
1599 return chapters
1600
545cc85d 1601 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1602 return self._parse_json(self._search_regex(
1603 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1604 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1605
d92f5d5a 1606 @staticmethod
1607 def parse_time_text(time_text):
1608 """
1609 Parse the comment time text
1610 time_text is in the format 'X units ago (edited)'
1611 """
1612 time_text_split = time_text.split(' ')
1613 if len(time_text_split) >= 3:
1614 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1615
a1c5d2ca
M
1616 @staticmethod
1617 def _join_text_entries(runs):
1618 text = None
1619 for run in runs:
1620 if not isinstance(run, dict):
1621 continue
1622 sub_text = try_get(run, lambda x: x['text'], compat_str)
1623 if sub_text:
1624 if not text:
1625 text = sub_text
1626 continue
1627 text += sub_text
1628 return text
1629
1630 def _extract_comment(self, comment_renderer, parent=None):
1631 comment_id = comment_renderer.get('commentId')
1632 if not comment_id:
1633 return
1634 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1635 text = self._join_text_entries(comment_text_runs) or ''
1636 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1637 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1638 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1639 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1640 author_id = try_get(comment_renderer,
1641 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1642 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1643 lambda x: x['likeCount']), compat_str)) or 0
1644 author_thumbnail = try_get(comment_renderer,
1645 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1646
1647 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1648 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1649 return {
1650 'id': comment_id,
1651 'text': text,
d92f5d5a 1652 'timestamp': timestamp,
a1c5d2ca
M
1653 'time_text': time_text,
1654 'like_count': votes,
1655 'is_favorited': is_liked,
1656 'author': author,
1657 'author_id': author_id,
1658 'author_thumbnail': author_thumbnail,
1659 'author_is_uploader': author_is_uploader,
1660 'parent': parent or 'root'
1661 }
1662
1663 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1664 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1665
1666 def extract_thread(parent_renderer):
1667 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1668 if not parent:
1669 comment_counts[2] = 0
1670 for content in contents:
1671 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1672 comment_renderer = try_get(
1673 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1674 content, (lambda x: x['commentRenderer'], dict))
1675
1676 if not comment_renderer:
1677 continue
1678 comment = self._extract_comment(comment_renderer, parent)
1679 if not comment:
1680 continue
1681 comment_counts[0] += 1
1682 yield comment
1683 # Attempt to get the replies
1684 comment_replies_renderer = try_get(
1685 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1686
1687 if comment_replies_renderer:
1688 comment_counts[2] += 1
1689 comment_entries_iter = self._comment_entries(
f4f751af 1690 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1691 parent=comment.get('id'), session_token_list=session_token_list,
1692 comment_counts=comment_counts)
1693
1694 for reply_comment in comment_entries_iter:
1695 yield reply_comment
1696
1697 if not comment_counts:
1698 # comment so far, est. total comments, current comment thread #
1699 comment_counts = [0, 0, 0]
a1c5d2ca
M
1700
1701 # TODO: Generalize the download code with TabIE
f4f751af 1702 context = self._extract_context(ytcfg)
1703 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1704 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1705 first_continuation = False
1706 if parent is None:
1707 first_continuation = True
1708
1709 for page_num in itertools.count(0):
1710 if not continuation:
1711 break
f4f751af 1712 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a06916d9 1713 retries = self.get_param('extractor_retries', 3)
a1c5d2ca
M
1714 count = -1
1715 last_error = None
1716
1717 while count < retries:
1718 count += 1
1719 if last_error:
1720 self.report_warning('%s. Retrying ...' % last_error)
1721 try:
1722 query = {
1723 'ctoken': continuation['ctoken'],
1724 'pbj': 1,
1725 'type': 'next',
1726 }
1727 if parent:
1728 query['action_get_comment_replies'] = 1
1729 else:
1730 query['action_get_comments'] = 1
1731
1732 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1733 if page_num == 0:
1734 if first_continuation:
d92f5d5a 1735 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1736 else:
d92f5d5a 1737 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1738 else:
d92f5d5a 1739 note_prefix = '%sDownloading comment%s page %d %s' % (
1740 ' ' if parent else '',
a1c5d2ca
M
1741 ' replies' if parent else '',
1742 page_num,
1743 comment_prog_str)
1744
1745 browse = self._download_json(
1746 'https://www.youtube.com/comment_service_ajax', None,
1747 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1748 headers=headers, query=query,
1749 data=urlencode_postdata({
1750 'session_token': session_token_list[0]
1751 }))
1752 except ExtractorError as e:
1753 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1754 if e.cause.code == 413:
d92f5d5a 1755 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1756 return
1757 # Downloading page may result in intermittent 5xx HTTP error
1758 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1759 last_error = 'HTTP Error %s' % e.cause.code
1760 if e.cause.code == 404:
d92f5d5a 1761 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1762 if count < retries:
1763 continue
1764 raise
1765 else:
1766 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1767 if session_token:
1768 session_token_list[0] = session_token
1769
1770 response = try_get(browse,
1771 (lambda x: x['response'],
1772 lambda x: x[1]['response'])) or {}
1773
1774 if response.get('continuationContents'):
1775 break
1776
1777 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1778 if browse.get('reload'):
d92f5d5a 1779 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1780
1781 # TODO: not tested, merged from old extractor
1782 err_msg = browse.get('externalErrorMessage')
1783 if err_msg:
1784 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1785
1786 # Youtube sometimes sends incomplete data
1787 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1788 last_error = 'Incomplete data received'
1789 if count >= retries:
6a39ee13 1790 raise ExtractorError(last_error)
a1c5d2ca
M
1791
1792 if not response:
1793 break
f4f751af 1794 visitor_data = try_get(
1795 response,
1796 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1797 compat_str) or visitor_data
a1c5d2ca
M
1798
1799 known_continuation_renderers = {
1800 'itemSectionContinuation': extract_thread,
1801 'commentRepliesContinuation': extract_thread
1802 }
1803
1804 # extract next root continuation from the results
1805 continuation_contents = try_get(
1806 response, lambda x: x['continuationContents'], dict) or {}
1807
1808 for key, value in continuation_contents.items():
1809 if key not in known_continuation_renderers:
1810 continue
1811 continuation_renderer = value
1812
1813 if first_continuation:
1814 first_continuation = False
1815 expected_comment_count = try_get(
1816 continuation_renderer,
1817 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1818 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1819 compat_str)
1820
1821 if expected_comment_count:
1822 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1823 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1824 yield comment_counts[1]
1825
1826 # TODO: cli arg.
1827 # 1/True for newest, 0/False for popular (default)
1828 comment_sort_index = int(True)
1829 sort_continuation_renderer = try_get(
1830 continuation_renderer,
1831 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1832 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1833 # If this fails, the initial continuation page
1834 # starts off with popular anyways.
1835 if sort_continuation_renderer:
1836 continuation = YoutubeTabIE._build_continuation_query(
1837 continuation=sort_continuation_renderer.get('continuation'),
1838 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1839 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1840 break
1841
1842 for entry in known_continuation_renderers[key](continuation_renderer):
1843 yield entry
1844
1845 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1846 break
1847
1848 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1849 """Entry for comment extraction"""
1850 comments = []
1851 known_entry_comment_renderers = (
1852 'itemSectionRenderer',
1853 )
1854 estimated_total = 0
1855 for entry in contents:
1856 for key, renderer in entry.items():
1857 if key not in known_entry_comment_renderers:
1858 continue
1859
1860 comment_iter = self._comment_entries(
1861 renderer,
1862 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1863 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1864 ytcfg=ytcfg,
a1c5d2ca
M
1865 session_token_list=[xsrf_token])
1866
1867 for comment in comment_iter:
1868 if isinstance(comment, int):
1869 estimated_total = comment
1870 continue
1871 comments.append(comment)
1872 break
d92f5d5a 1873 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1874 return {
1875 'comments': comments,
1876 'comment_count': len(comments),
1877 }
1878
4e6767b5 1879 @staticmethod
1880 def _get_video_info_params(video_id):
1881 return {
1882 'video_id': video_id,
1883 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1884 'html5': '1',
1885 'c': 'TVHTML5',
1886 'cver': '6.20180913',
1887 }
1888
c5e8d7af 1889 def _real_extract(self, url):
cf7e015f 1890 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1891 video_id = self._match_id(url)
9297939e 1892
1893 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
1894
545cc85d 1895 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1896 webpage_url = base_url + 'watch?v=' + video_id
1897 webpage = self._download_webpage(
cce889b9 1898 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1899
9297939e 1900 def get_text(x):
1901 if not x:
1902 return
1903 text = x.get('simpleText')
1904 if text and isinstance(text, compat_str):
1905 return text
1906 runs = x.get('runs')
1907 if not isinstance(runs, list):
1908 return
1909 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
1910
1911 ytm_streaming_data = {}
1912 if is_music_url:
1913 # we are forcing to use parse_json because 141 only appeared in get_video_info.
1914 # el, c, cver, cplayer field required for 141(aac 256kbps) codec
1915 # maybe paramter of youtube music player?
1916 ytm_player_response = self._parse_json(try_get(compat_parse_qs(
1917 self._download_webpage(
1918 base_url + 'get_video_info', video_id,
fe03a6cd 1919 'Fetching youtube music info webpage',
1920 'unable to download youtube music info webpage', query={
4e6767b5 1921 **self._get_video_info_params(video_id),
9297939e 1922 'el': 'detailpage',
1923 'c': 'WEB_REMIX',
1924 'cver': '0.1',
00ae2769 1925 'cplayer': 'UNIPLAYER',
9297939e 1926 }, fatal=False)),
1927 lambda x: x['player_response'][0],
1928 compat_str) or '{}', video_id)
1929 ytm_streaming_data = ytm_player_response.get('streamingData') or {}
1930
545cc85d 1931 player_response = None
1932 if webpage:
1933 player_response = self._extract_yt_initial_variable(
1934 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1935 video_id, 'initial player response')
f4f751af 1936
1937 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1938 if not player_response:
1939 player_response = self._call_api(
f4f751af 1940 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1941
1942 playability_status = player_response.get('playabilityStatus') or {}
1943 if playability_status.get('reason') == 'Sign in to confirm your age':
1944 pr = self._parse_json(try_get(compat_parse_qs(
1945 self._download_webpage(
1946 base_url + 'get_video_info', video_id,
4e6767b5 1947 'Refetching age-gated info webpage', 'unable to download video info webpage',
1948 query=self._get_video_info_params(video_id), fatal=False)),
545cc85d 1949 lambda x: x['player_response'][0],
1950 compat_str) or '{}', video_id)
1951 if pr:
1952 player_response = pr
1953
1954 trailer_video_id = try_get(
1955 playability_status,
1956 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1957 compat_str)
1958 if trailer_video_id:
1959 return self.url_result(
1960 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1961
545cc85d 1962 search_meta = (
1963 lambda x: self._html_search_meta(x, webpage, default=None)) \
1964 if webpage else lambda x: None
dbdaaa23 1965
545cc85d 1966 video_details = player_response.get('videoDetails') or {}
37357d21 1967 microformat = try_get(
545cc85d 1968 player_response,
1969 lambda x: x['microformat']['playerMicroformatRenderer'],
1970 dict) or {}
1971 video_title = video_details.get('title') \
1972 or get_text(microformat.get('title')) \
1973 or search_meta(['og:title', 'twitter:title', 'title'])
1974 video_description = video_details.get('shortDescription')
cf7e015f 1975
8fe10494 1976 if not smuggled_data.get('force_singlefeed', False):
a06916d9 1977 if not self.get_param('noplaylist'):
8fe10494
S
1978 multifeed_metadata_list = try_get(
1979 player_response,
1980 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1981 compat_str)
8fe10494
S
1982 if multifeed_metadata_list:
1983 entries = []
1984 feed_ids = []
1985 for feed in multifeed_metadata_list.split(','):
1986 # Unquote should take place before split on comma (,) since textual
1987 # fields may contain comma as well (see
067aa17e 1988 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1989 feed_data = compat_parse_qs(
1990 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1991
1992 def feed_entry(name):
545cc85d 1993 return try_get(
1994 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1995
1996 feed_id = feed_entry('id')
1997 if not feed_id:
1998 continue
1999 feed_title = feed_entry('title')
2000 title = video_title
2001 if feed_title:
2002 title += ' (%s)' % feed_title
8fe10494
S
2003 entries.append({
2004 '_type': 'url_transparent',
2005 'ie_key': 'Youtube',
2006 'url': smuggle_url(
545cc85d 2007 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2008 {'force_singlefeed': True}),
6b09401b 2009 'title': title,
8fe10494 2010 })
6b09401b 2011 feed_ids.append(feed_id)
8fe10494
S
2012 self.to_screen(
2013 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2014 % (', '.join(feed_ids), video_id))
545cc85d 2015 return self.playlist_result(
2016 entries, video_id, video_title, video_description)
8fe10494
S
2017 else:
2018 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2019
9297939e 2020 formats, itags, stream_ids = [], [], []
cc2db878 2021 itag_qualities = {}
545cc85d 2022 player_url = None
d3fc8074 2023 q = qualities([
2024 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2025 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2026 ])
9297939e 2027
545cc85d 2028 streaming_data = player_response.get('streamingData') or {}
2029 streaming_formats = streaming_data.get('formats') or []
2030 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2031 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2032 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2033
545cc85d 2034 for fmt in streaming_formats:
2035 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2036 continue
321bf820 2037
cc2db878 2038 itag = str_or_none(fmt.get('itag'))
9297939e 2039 audio_track = fmt.get('audioTrack') or {}
2040 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2041 if stream_id in stream_ids:
2042 continue
2043
cc2db878 2044 quality = fmt.get('quality')
d3fc8074 2045 if quality == 'tiny' or not quality:
2046 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2047 if itag and quality:
2048 itag_qualities[itag] = quality
2049 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2050 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2051 # number of fragment that would subsequently requested with (`&sq=N`)
2052 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2053 continue
2054
545cc85d 2055 fmt_url = fmt.get('url')
2056 if not fmt_url:
2057 sc = compat_parse_qs(fmt.get('signatureCipher'))
2058 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2059 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2060 if not (sc and fmt_url and encrypted_sig):
2061 continue
2062 if not player_url:
2063 if not webpage:
2064 continue
2065 player_url = self._search_regex(
2066 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
2067 webpage, 'player URL', fatal=False)
2068 if not player_url:
201e9eaa 2069 continue
545cc85d 2070 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2071 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2072 fmt_url += '&' + sp + '=' + signature
2073
545cc85d 2074 if itag:
2075 itags.append(itag)
9297939e 2076 stream_ids.append(stream_id)
2077
cc2db878 2078 tbr = float_or_none(
2079 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2080 dct = {
2081 'asr': int_or_none(fmt.get('audioSampleRate')),
2082 'filesize': int_or_none(fmt.get('contentLength')),
2083 'format_id': itag,
0fb983f6 2084 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2085 'fps': int_or_none(fmt.get('fps')),
2086 'height': int_or_none(fmt.get('height')),
dca3ff4a 2087 'quality': q(quality),
cc2db878 2088 'tbr': tbr,
545cc85d 2089 'url': fmt_url,
2090 'width': fmt.get('width'),
0fb983f6 2091 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2092 }
2093 mimetype = fmt.get('mimeType')
2094 if mimetype:
2095 mobj = re.match(
2096 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2097 if mobj:
2098 dct['ext'] = mimetype2ext(mobj.group(1))
2099 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2100 no_audio = dct.get('acodec') == 'none'
2101 no_video = dct.get('vcodec') == 'none'
2102 if no_audio:
2103 dct['vbr'] = tbr
2104 if no_video:
2105 dct['abr'] = tbr
2106 if no_audio or no_video:
545cc85d 2107 dct['downloader_options'] = {
2108 # Youtube throttles chunks >~10M
2109 'http_chunk_size': 10485760,
bf1317d2 2110 }
7c60c33e 2111 if dct.get('ext'):
2112 dct['container'] = dct['ext'] + '_dash'
545cc85d 2113 formats.append(dct)
2114
9297939e 2115 for sd in (streaming_data, ytm_streaming_data):
2116 hls_manifest_url = sd.get('hlsManifestUrl')
2117 if hls_manifest_url:
2118 for f in self._extract_m3u8_formats(
2119 hls_manifest_url, video_id, 'mp4', fatal=False):
2120 itag = self._search_regex(
2121 r'/itag/(\d+)', f['url'], 'itag', default=None)
2122 if itag:
2123 f['format_id'] = itag
8d68ab98 2124 formats.append(f)
545cc85d 2125
a06916d9 2126 if self.get_param('youtube_include_dash_manifest', True):
9297939e 2127 for sd in (streaming_data, ytm_streaming_data):
2128 dash_manifest_url = sd.get('dashManifestUrl')
2129 if dash_manifest_url:
2130 for f in self._extract_mpd_formats(
2131 dash_manifest_url, video_id, fatal=False):
2132 itag = f['format_id']
2133 if itag in itags:
2134 continue
2135 if itag in itag_qualities:
9297939e 2136 f['quality'] = q(itag_qualities[itag])
2137 filesize = int_or_none(self._search_regex(
2138 r'/clen/(\d+)', f.get('fragment_base_url')
2139 or f['url'], 'file size', default=None))
2140 if filesize:
2141 f['filesize'] = filesize
2142 formats.append(f)
bf1317d2 2143
545cc85d 2144 if not formats:
a06916d9 2145 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2146 self.raise_no_formats(
545cc85d 2147 'This video is DRM protected.', expected=True)
2148 pemr = try_get(
2149 playability_status,
2150 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2151 dict) or {}
2152 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2153 subreason = pemr.get('subreason')
2154 if subreason:
2155 subreason = clean_html(get_text(subreason))
2156 if subreason == 'The uploader has not made this video available in your country.':
2157 countries = microformat.get('availableCountries')
2158 if not countries:
2159 regions_allowed = search_meta('regionsAllowed')
2160 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2161 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2162 reason += '\n' + subreason
2163 if reason:
b7da73eb 2164 self.raise_no_formats(reason, expected=True)
bf1317d2 2165
545cc85d 2166 self._sort_formats(formats)
bf1317d2 2167
545cc85d 2168 keywords = video_details.get('keywords') or []
2169 if not keywords and webpage:
2170 keywords = [
2171 unescapeHTML(m.group('content'))
2172 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2173 for keyword in keywords:
2174 if keyword.startswith('yt:stretch='):
201c1459 2175 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2176 if mobj:
2177 # NB: float is intentional for forcing float division
2178 w, h = (float(v) for v in mobj.groups())
2179 if w > 0 and h > 0:
2180 ratio = w / h
2181 for f in formats:
2182 if f.get('vcodec') != 'none':
2183 f['stretched_ratio'] = ratio
2184 break
6449cd80 2185
545cc85d 2186 thumbnails = []
2187 for container in (video_details, microformat):
2188 for thumbnail in (try_get(
2189 container,
2190 lambda x: x['thumbnail']['thumbnails'], list) or []):
2191 thumbnail_url = thumbnail.get('url')
2192 if not thumbnail_url:
bf1317d2 2193 continue
1988fab7 2194 # Sometimes youtube gives a wrong thumbnail URL. See:
2195 # https://github.com/yt-dlp/yt-dlp/issues/233
2196 # https://github.com/ytdl-org/youtube-dl/issues/28023
2197 if 'maxresdefault' in thumbnail_url:
2198 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2199 thumbnails.append({
545cc85d 2200 'url': thumbnail_url,
ff2751ac 2201 'height': int_or_none(thumbnail.get('height')),
545cc85d 2202 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2203 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2204 })
ff2751ac 2205 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2206 if thumbnail_url:
2207 thumbnails.append({
2208 'url': thumbnail_url,
2209 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2210 })
2211 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2212 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2213 thumbnails.append({
2214 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2215 'preference': 1,
2216 })
2217 self._remove_duplicate_formats(thumbnails)
545cc85d 2218
2219 category = microformat.get('category') or search_meta('genre')
2220 channel_id = video_details.get('channelId') \
2221 or microformat.get('externalChannelId') \
2222 or search_meta('channelId')
2223 duration = int_or_none(
2224 video_details.get('lengthSeconds')
2225 or microformat.get('lengthSeconds')) \
2226 or parse_duration(search_meta('duration'))
2227 is_live = video_details.get('isLive')
2228 owner_profile_url = microformat.get('ownerProfileUrl')
2229
2230 info = {
2231 'id': video_id,
2232 'title': self._live_title(video_title) if is_live else video_title,
2233 'formats': formats,
2234 'thumbnails': thumbnails,
2235 'description': video_description,
2236 'upload_date': unified_strdate(
2237 microformat.get('uploadDate')
2238 or search_meta('uploadDate')),
2239 'uploader': video_details['author'],
2240 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2241 'uploader_url': owner_profile_url,
2242 'channel_id': channel_id,
2243 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2244 'duration': duration,
2245 'view_count': int_or_none(
2246 video_details.get('viewCount')
2247 or microformat.get('viewCount')
2248 or search_meta('interactionCount')),
2249 'average_rating': float_or_none(video_details.get('averageRating')),
2250 'age_limit': 18 if (
2251 microformat.get('isFamilySafe') is False
2252 or search_meta('isFamilyFriendly') == 'false'
2253 or search_meta('og:restrictions:age') == '18+') else 0,
2254 'webpage_url': webpage_url,
2255 'categories': [category] if category else None,
2256 'tags': keywords,
2257 'is_live': is_live,
2258 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2259 'was_live': video_details.get('isLiveContent'),
545cc85d 2260 }
b477fc13 2261
545cc85d 2262 pctr = try_get(
2263 player_response,
2264 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2265 subtitles = {}
2266 if pctr:
774d79cc 2267 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2268 lang_subs = container.setdefault(lang_code, [])
545cc85d 2269 for fmt in self._SUBTITLE_FORMATS:
2270 query.update({
2271 'fmt': fmt,
2272 })
2273 lang_subs.append({
2274 'ext': fmt,
2275 'url': update_url_query(base_url, query),
774d79cc 2276 'name': sub_name,
545cc85d 2277 })
7e72694b 2278
545cc85d 2279 for caption_track in (pctr.get('captionTracks') or []):
2280 base_url = caption_track.get('baseUrl')
2281 if not base_url:
2282 continue
2283 if caption_track.get('kind') != 'asr':
120916da 2284 lang_code = (
2285 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2286 or caption_track.get('languageCode'))
545cc85d 2287 if not lang_code:
2288 continue
2289 process_language(
774d79cc 2290 subtitles, base_url, lang_code,
2291 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2292 {})
545cc85d 2293 continue
2294 automatic_captions = {}
2295 for translation_language in (pctr.get('translationLanguages') or []):
2296 translation_language_code = translation_language.get('languageCode')
2297 if not translation_language_code:
2298 continue
2299 process_language(
2300 automatic_captions, base_url, translation_language_code,
774d79cc 2301 try_get(translation_language, lambda x: x['languageName']['simpleText']),
545cc85d 2302 {'tlang': translation_language_code})
2303 info['automatic_captions'] = automatic_captions
2304 info['subtitles'] = subtitles
7e72694b 2305
545cc85d 2306 parsed_url = compat_urllib_parse_urlparse(url)
2307 for component in [parsed_url.fragment, parsed_url.query]:
2308 query = compat_parse_qs(component)
2309 for k, v in query.items():
2310 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2311 d_k += '_time'
2312 if d_k not in info and k in s_ks:
2313 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2314
2315 # Youtube Music Auto-generated description
822b9d9c 2316 if video_description:
38d70284 2317 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2318 if mobj:
822b9d9c
RA
2319 release_year = mobj.group('release_year')
2320 release_date = mobj.group('release_date')
2321 if release_date:
2322 release_date = release_date.replace('-', '')
2323 if not release_year:
545cc85d 2324 release_year = release_date[:4]
2325 info.update({
2326 'album': mobj.group('album'.strip()),
2327 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2328 'track': mobj.group('track').strip(),
2329 'release_date': release_date,
cc2db878 2330 'release_year': int_or_none(release_year),
545cc85d 2331 })
7e72694b 2332
545cc85d 2333 initial_data = None
2334 if webpage:
2335 initial_data = self._extract_yt_initial_variable(
2336 webpage, self._YT_INITIAL_DATA_RE, video_id,
2337 'yt initial data')
2338 if not initial_data:
2339 initial_data = self._call_api(
f4f751af 2340 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2341
2342 if not is_live:
2343 try:
2344 # This will error if there is no livechat
2345 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2346 info['subtitles']['live_chat'] = [{
394dcd44 2347 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2348 'video_id': video_id,
2349 'ext': 'json',
2350 'protocol': 'youtube_live_chat_replay',
2351 }]
2352 except (KeyError, IndexError, TypeError):
2353 pass
2354
2355 if initial_data:
2356 chapters = self._extract_chapters_from_json(
2357 initial_data, video_id, duration)
2358 if not chapters:
2359 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2360 contents = try_get(
2361 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2362 list)
2363 if not contents:
2364 continue
2365
2366 def chapter_time(mmlir):
2367 return parse_duration(
2368 get_text(mmlir.get('timeDescription')))
2369
2370 chapters = []
2371 for next_num, content in enumerate(contents, start=1):
2372 mmlir = content.get('macroMarkersListItemRenderer') or {}
2373 start_time = chapter_time(mmlir)
2374 end_time = chapter_time(try_get(
2375 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2376 if next_num < len(contents) else duration
2377 if start_time is None or end_time is None:
2378 continue
2379 chapters.append({
2380 'start_time': start_time,
2381 'end_time': end_time,
2382 'title': get_text(mmlir.get('title')),
2383 })
2384 if chapters:
2385 break
2386 if chapters:
2387 info['chapters'] = chapters
2388
2389 contents = try_get(
2390 initial_data,
2391 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2392 list) or []
2393 for content in contents:
2394 vpir = content.get('videoPrimaryInfoRenderer')
2395 if vpir:
2396 stl = vpir.get('superTitleLink')
2397 if stl:
2398 stl = get_text(stl)
2399 if try_get(
2400 vpir,
2401 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2402 info['location'] = stl
2403 else:
2404 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2405 if mobj:
2406 info.update({
2407 'series': mobj.group(1),
2408 'season_number': int(mobj.group(2)),
2409 'episode_number': int(mobj.group(3)),
2410 })
2411 for tlb in (try_get(
2412 vpir,
2413 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2414 list) or []):
2415 tbr = tlb.get('toggleButtonRenderer') or {}
2416 for getter, regex in [(
2417 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2418 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2419 lambda x: x['accessibility'],
2420 lambda x: x['accessibilityData']['accessibilityData'],
2421 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2422 label = (try_get(tbr, getter, dict) or {}).get('label')
2423 if label:
2424 mobj = re.match(regex, label)
2425 if mobj:
2426 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2427 break
2428 sbr_tooltip = try_get(
2429 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2430 if sbr_tooltip:
2431 like_count, dislike_count = sbr_tooltip.split(' / ')
2432 info.update({
2433 'like_count': str_to_int(like_count),
2434 'dislike_count': str_to_int(dislike_count),
2435 })
2436 vsir = content.get('videoSecondaryInfoRenderer')
2437 if vsir:
2438 info['channel'] = get_text(try_get(
2439 vsir,
2440 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2441 dict))
545cc85d 2442 rows = try_get(
2443 vsir,
2444 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2445 list) or []
2446 multiple_songs = False
2447 for row in rows:
2448 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2449 multiple_songs = True
2450 break
2451 for row in rows:
2452 mrr = row.get('metadataRowRenderer') or {}
2453 mrr_title = mrr.get('title')
2454 if not mrr_title:
2455 continue
2456 mrr_title = get_text(mrr['title'])
2457 mrr_contents_text = get_text(mrr['contents'][0])
2458 if mrr_title == 'License':
2459 info['license'] = mrr_contents_text
2460 elif not multiple_songs:
2461 if mrr_title == 'Album':
2462 info['album'] = mrr_contents_text
2463 elif mrr_title == 'Artist':
2464 info['artist'] = mrr_contents_text
2465 elif mrr_title == 'Song':
2466 info['track'] = mrr_contents_text
2467
2468 fallbacks = {
2469 'channel': 'uploader',
2470 'channel_id': 'uploader_id',
2471 'channel_url': 'uploader_url',
2472 }
2473 for to, frm in fallbacks.items():
2474 if not info.get(to):
2475 info[to] = info.get(frm)
2476
2477 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2478 v = info.get(s_k)
2479 if v:
2480 info[d_k] = v
b84071c0 2481
c224251a
M
2482 is_private = bool_or_none(video_details.get('isPrivate'))
2483 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2484 is_membersonly = None
b28f8d24 2485 is_premium = None
c224251a
M
2486 if initial_data and is_private is not None:
2487 is_membersonly = False
b28f8d24 2488 is_premium = False
c224251a
M
2489 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2490 for content in contents or []:
2491 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2492 for badge in badges or []:
2493 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2494 if label.lower() == 'members only':
2495 is_membersonly = True
2496 break
b28f8d24
M
2497 elif label.lower() == 'premium':
2498 is_premium = True
2499 break
2500 if is_membersonly or is_premium:
c224251a
M
2501 break
2502
2503 # TODO: Add this for playlists
2504 info['availability'] = self._availability(
2505 is_private=is_private,
b28f8d24 2506 needs_premium=is_premium,
c224251a
M
2507 needs_subscription=is_membersonly,
2508 needs_auth=info['age_limit'] >= 18,
2509 is_unlisted=None if is_private is None else is_unlisted)
2510
06167fbb 2511 # get xsrf for annotations or comments
a06916d9 2512 get_annotations = self.get_param('writeannotations', False)
2513 get_comments = self.get_param('getcomments', False)
06167fbb 2514 if get_annotations or get_comments:
29f7c58a 2515 xsrf_token = None
545cc85d 2516 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2517 if ytcfg:
2518 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2519 if not xsrf_token:
2520 xsrf_token = self._search_regex(
2521 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2522 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2523
2524 # annotations
06167fbb 2525 if get_annotations:
64b6a4e9
RA
2526 invideo_url = try_get(
2527 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2528 if xsrf_token and invideo_url:
29f7c58a 2529 xsrf_field_name = None
2530 if ytcfg:
2531 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2532 if not xsrf_field_name:
2533 xsrf_field_name = self._search_regex(
2534 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2535 webpage, 'xsrf field name',
29f7c58a 2536 group='xsrf_field_name', default='session_token')
8a784c74 2537 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2538 self._proto_relative_url(invideo_url),
2539 video_id, note='Downloading annotations',
2540 errnote='Unable to download video annotations', fatal=False,
2541 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2542
277d6ff5 2543 if get_comments:
a1c5d2ca 2544 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2545
545cc85d 2546 self.mark_watched(video_id, player_response)
d77ab8e2 2547
545cc85d 2548 return info
c5e8d7af 2549
5f6a1245 2550
8bdd16b4 2551class YoutubeTabIE(YoutubeBaseInfoExtractor):
2552 IE_DESC = 'YouTube.com tab'
70d5c17b 2553 _VALID_URL = r'''(?x)
2554 https?://
2555 (?:\w+\.)?
2556 (?:
2557 youtube(?:kids)?\.com|
2558 invidio\.us
2559 )/
2560 (?:
fe03a6cd 2561 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 2562 (?P<not_channel>
9ba5705a 2563 feed/|hashtag/|
70d5c17b 2564 (?:playlist|watch)\?.*?\blist=
2565 )|
29f7c58a 2566 (?!(?:%s)\b) # Direct URLs
70d5c17b 2567 )
2568 (?P<id>[^/?\#&]+)
2569 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2570 IE_NAME = 'youtube:tab'
2571
81127aa5 2572 _TESTS = [{
da692b79 2573 'note': 'playlists, multipage',
8bdd16b4 2574 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2575 'playlist_mincount': 94,
2576 'info_dict': {
2577 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2578 'title': 'Игорь Клейнер - Playlists',
2579 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2580 'uploader': 'Игорь Клейнер',
2581 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2582 },
2583 }, {
da692b79 2584 'note': 'playlists, multipage, different order',
8bdd16b4 2585 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2586 'playlist_mincount': 94,
2587 'info_dict': {
2588 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2589 'title': 'Игорь Клейнер - Playlists',
2590 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2591 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2592 'uploader': 'Игорь Клейнер',
8bdd16b4 2593 },
201c1459 2594 }, {
da692b79 2595 'note': 'playlists, series',
201c1459 2596 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2597 'playlist_mincount': 5,
2598 'info_dict': {
2599 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2600 'title': '3Blue1Brown - Playlists',
2601 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 2602 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2603 'uploader': '3Blue1Brown',
201c1459 2604 },
8bdd16b4 2605 }, {
da692b79 2606 'note': 'playlists, singlepage',
8bdd16b4 2607 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2608 'playlist_mincount': 4,
2609 'info_dict': {
2610 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2611 'title': 'ThirstForScience - Playlists',
2612 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2613 'uploader': 'ThirstForScience',
2614 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2615 }
2616 }, {
2617 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2618 'only_matching': True,
2619 }, {
da692b79 2620 'note': 'basic, single video playlist',
0e30a7b9 2621 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2622 'info_dict': {
0e30a7b9 2623 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2624 'uploader': 'Sergey M.',
2625 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2626 'title': 'youtube-dl public playlist',
81127aa5 2627 },
0e30a7b9 2628 'playlist_count': 1,
9291475f 2629 }, {
da692b79 2630 'note': 'empty playlist',
0e30a7b9 2631 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2632 'info_dict': {
0e30a7b9 2633 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2634 'uploader': 'Sergey M.',
2635 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2636 'title': 'youtube-dl empty playlist',
9291475f
PH
2637 },
2638 'playlist_count': 0,
2639 }, {
da692b79 2640 'note': 'Home tab',
8bdd16b4 2641 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2642 'info_dict': {
8bdd16b4 2643 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2644 'title': 'lex will - Home',
2645 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2646 'uploader': 'lex will',
2647 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2648 },
8bdd16b4 2649 'playlist_mincount': 2,
9291475f 2650 }, {
da692b79 2651 'note': 'Videos tab',
8bdd16b4 2652 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2653 'info_dict': {
8bdd16b4 2654 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2655 'title': 'lex will - Videos',
2656 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2657 'uploader': 'lex will',
2658 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2659 },
8bdd16b4 2660 'playlist_mincount': 975,
9291475f 2661 }, {
da692b79 2662 'note': 'Videos tab, sorted by popular',
8bdd16b4 2663 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2664 'info_dict': {
8bdd16b4 2665 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2666 'title': 'lex will - Videos',
2667 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2668 'uploader': 'lex will',
2669 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2670 },
8bdd16b4 2671 'playlist_mincount': 199,
9291475f 2672 }, {
da692b79 2673 'note': 'Playlists tab',
8bdd16b4 2674 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2675 'info_dict': {
8bdd16b4 2676 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2677 'title': 'lex will - Playlists',
2678 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2679 'uploader': 'lex will',
2680 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2681 },
8bdd16b4 2682 'playlist_mincount': 17,
ac7553d0 2683 }, {
da692b79 2684 'note': 'Community tab',
8bdd16b4 2685 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2686 'info_dict': {
8bdd16b4 2687 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2688 'title': 'lex will - Community',
2689 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2690 'uploader': 'lex will',
2691 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2692 },
2693 'playlist_mincount': 18,
87dadd45 2694 }, {
da692b79 2695 'note': 'Channels tab',
8bdd16b4 2696 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2697 'info_dict': {
8bdd16b4 2698 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2699 'title': 'lex will - Channels',
2700 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2701 'uploader': 'lex will',
2702 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2703 },
deaec5af 2704 'playlist_mincount': 12,
cd684175 2705 }, {
2706 'note': 'Search tab',
2707 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
2708 'playlist_mincount': 40,
2709 'info_dict': {
2710 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2711 'title': '3Blue1Brown - Search - linear algebra',
2712 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2713 'uploader': '3Blue1Brown',
2714 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2715 },
6b08cdf6 2716 }, {
a0566bbf 2717 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2718 'only_matching': True,
2719 }, {
a0566bbf 2720 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2721 'only_matching': True,
2722 }, {
a0566bbf 2723 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2724 'only_matching': True,
2725 }, {
2726 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2727 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2728 'info_dict': {
2729 'title': '29C3: Not my department',
2730 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2731 'uploader': 'Christiaan008',
2732 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2733 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2734 },
2735 'playlist_count': 96,
2736 }, {
2737 'note': 'Large playlist',
2738 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2739 'info_dict': {
8bdd16b4 2740 'title': 'Uploads from Cauchemar',
2741 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2742 'uploader': 'Cauchemar',
2743 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2744 },
8bdd16b4 2745 'playlist_mincount': 1123,
2746 }, {
da692b79 2747 'note': 'even larger playlist, 8832 videos',
8bdd16b4 2748 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2749 'only_matching': True,
4b7df0d3
JMF
2750 }, {
2751 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2752 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2753 'info_dict': {
acf757f4
PH
2754 'title': 'Uploads from Interstellar Movie',
2755 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2756 'uploader': 'Interstellar Movie',
8bdd16b4 2757 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2758 },
481cc733 2759 'playlist_mincount': 21,
358de58c 2760 }, {
2761 'note': 'Playlist with "show unavailable videos" button',
2762 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2763 'info_dict': {
2764 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2765 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2766 'uploader': 'Phim Siêu Nhân Nhật Bản',
2767 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2768 },
da692b79 2769 'playlist_mincount': 200,
5d342002 2770 }, {
da692b79 2771 'note': 'Playlist with unavailable videos in page 7',
5d342002 2772 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2773 'info_dict': {
2774 'title': 'Uploads from BlankTV',
2775 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2776 'uploader': 'BlankTV',
2777 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2778 },
da692b79 2779 'playlist_mincount': 1000,
8bdd16b4 2780 }, {
da692b79 2781 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 2782 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2783 'info_dict': {
2784 'title': 'Data Analysis with Dr Mike Pound',
2785 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2786 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2787 'uploader': 'Computerphile',
deaec5af 2788 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2789 },
2790 'playlist_mincount': 11,
2791 }, {
a0566bbf 2792 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2793 'only_matching': True,
dacb3a86 2794 }, {
da692b79 2795 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
2796 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2797 'info_dict': {
2798 'id': 'FqZTN594JQw',
2799 'ext': 'webm',
2800 'title': "Smiley's People 01 detective, Adventure Series, Action",
2801 'uploader': 'STREEM',
2802 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2803 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2804 'upload_date': '20150526',
2805 'license': 'Standard YouTube License',
2806 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2807 'categories': ['People & Blogs'],
2808 'tags': list,
dbdaaa23 2809 'view_count': int,
dacb3a86
S
2810 'like_count': int,
2811 'dislike_count': int,
2812 },
2813 'params': {
2814 'skip_download': True,
2815 },
13a75688 2816 'skip': 'This video is not available.',
dacb3a86 2817 'add_ie': [YoutubeIE.ie_key()],
481cc733 2818 }, {
8bdd16b4 2819 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2820 'only_matching': True,
66b48727 2821 }, {
8bdd16b4 2822 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2823 'only_matching': True,
a0566bbf 2824 }, {
2825 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2826 'info_dict': {
da692b79 2827 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 2828 'ext': 'mp4',
deaec5af 2829 'title': compat_str,
a0566bbf 2830 'uploader': 'Sky News',
2831 'uploader_id': 'skynews',
2832 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 2833 'upload_date': r're:\d{8}',
2834 'description': compat_str,
a0566bbf 2835 'categories': ['News & Politics'],
2836 'tags': list,
2837 'like_count': int,
2838 'dislike_count': int,
2839 },
2840 'params': {
2841 'skip_download': True,
2842 },
da692b79 2843 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 2844 }, {
2845 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2846 'info_dict': {
2847 'id': 'a48o2S1cPoo',
2848 'ext': 'mp4',
2849 'title': 'The Young Turks - Live Main Show',
2850 'uploader': 'The Young Turks',
2851 'uploader_id': 'TheYoungTurks',
2852 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2853 'upload_date': '20150715',
2854 'license': 'Standard YouTube License',
2855 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2856 'categories': ['News & Politics'],
2857 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2858 'like_count': int,
2859 'dislike_count': int,
2860 },
2861 'params': {
2862 'skip_download': True,
2863 },
2864 'only_matching': True,
2865 }, {
2866 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2867 'only_matching': True,
2868 }, {
2869 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2870 'only_matching': True,
09f1580e 2871 }, {
2872 'note': 'A channel that is not live. Should raise error',
2873 'url': 'https://www.youtube.com/user/numberphile/live',
2874 'only_matching': True,
3d3dddc9 2875 }, {
2876 'url': 'https://www.youtube.com/feed/trending',
2877 'only_matching': True,
2878 }, {
3d3dddc9 2879 'url': 'https://www.youtube.com/feed/library',
2880 'only_matching': True,
2881 }, {
3d3dddc9 2882 'url': 'https://www.youtube.com/feed/history',
2883 'only_matching': True,
2884 }, {
3d3dddc9 2885 'url': 'https://www.youtube.com/feed/subscriptions',
2886 'only_matching': True,
2887 }, {
3d3dddc9 2888 'url': 'https://www.youtube.com/feed/watch_later',
2889 'only_matching': True,
2890 }, {
da692b79 2891 'note': 'Recommended - redirects to home page',
3d3dddc9 2892 'url': 'https://www.youtube.com/feed/recommended',
2893 'only_matching': True,
29f7c58a 2894 }, {
da692b79 2895 'note': 'inline playlist with not always working continuations',
29f7c58a 2896 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2897 'only_matching': True,
2898 }, {
2899 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2900 'only_matching': True,
2901 }, {
2902 'url': 'https://www.youtube.com/course',
2903 'only_matching': True,
2904 }, {
2905 'url': 'https://www.youtube.com/zsecurity',
2906 'only_matching': True,
2907 }, {
2908 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2909 'only_matching': True,
2910 }, {
2911 'url': 'https://www.youtube.com/TheYoungTurks/live',
2912 'only_matching': True,
39ed931e 2913 }, {
2914 'url': 'https://www.youtube.com/hashtag/cctv9',
2915 'info_dict': {
2916 'id': 'cctv9',
2917 'title': '#cctv9',
2918 },
2919 'playlist_mincount': 350,
201c1459 2920 }, {
2921 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2922 'only_matching': True,
9297939e 2923 }, {
da692b79 2924 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 2925 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2926 'only_matching': True
fe03a6cd 2927 }, {
2928 'note': '/browse/ should redirect to /channel/',
2929 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
2930 'only_matching': True
2931 }, {
2932 'note': 'VLPL, should redirect to playlist?list=PL...',
2933 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2934 'info_dict': {
2935 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2936 'uploader': 'NoCopyrightSounds',
2937 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
2938 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
2939 'title': 'NCS Releases',
2940 },
2941 'playlist_mincount': 166,
18db7548 2942 }, {
2943 'note': 'Topic, should redirect to playlist?list=UU...',
2944 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
2945 'info_dict': {
2946 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
2947 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
2948 'title': 'Uploads from Royalty Free Music - Topic',
2949 'uploader': 'Royalty Free Music - Topic',
2950 },
2951 'expected_warnings': [
2952 'A channel/user page was given',
2953 'The URL does not have a videos tab',
2954 ],
2955 'playlist_mincount': 101,
2956 }, {
2957 'note': 'Topic without a UU playlist',
2958 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
2959 'info_dict': {
2960 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
2961 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
2962 },
2963 'expected_warnings': [
2964 'A channel/user page was given',
2965 'The URL does not have a videos tab',
2966 'Falling back to channel URL',
2967 ],
2968 'playlist_mincount': 9,
abcdd12b 2969 }, {
2970 'note': 'Youtube music Album',
2971 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
2972 'info_dict': {
2973 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
2974 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
2975 },
2976 'playlist_count': 50,
29f7c58a 2977 }]
2978
2979 @classmethod
2980 def suitable(cls, url):
2981 return False if YoutubeIE.suitable(url) else super(
2982 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2983
2984 def _extract_channel_id(self, webpage):
2985 channel_id = self._html_search_meta(
2986 'channelId', webpage, 'channel id', default=None)
2987 if channel_id:
2988 return channel_id
2989 channel_url = self._html_search_meta(
2990 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2991 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2992 'twitter:app:url:googleplay'), webpage, 'channel url')
2993 return self._search_regex(
2994 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2995 channel_url, 'channel id')
15f6397c 2996
8bdd16b4 2997 @staticmethod
cd7c66cf 2998 def _extract_basic_item_renderer(item):
2999 # Modified from _extract_grid_item_renderer
201c1459 3000 known_basic_renderers = (
3001 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3002 )
3003 for key, renderer in item.items():
201c1459 3004 if not isinstance(renderer, dict):
cd7c66cf 3005 continue
201c1459 3006 elif key in known_basic_renderers:
3007 return renderer
3008 elif key.startswith('grid') and key.endswith('Renderer'):
3009 return renderer
8bdd16b4 3010
8bdd16b4 3011 def _grid_entries(self, grid_renderer):
3012 for item in grid_renderer['items']:
3013 if not isinstance(item, dict):
39b62db1 3014 continue
cd7c66cf 3015 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3016 if not isinstance(renderer, dict):
3017 continue
3018 title = try_get(
201c1459 3019 renderer, (lambda x: x['title']['runs'][0]['text'],
3020 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 3021 # playlist
3022 playlist_id = renderer.get('playlistId')
3023 if playlist_id:
3024 yield self.url_result(
3025 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3026 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3027 video_title=title)
201c1459 3028 continue
8bdd16b4 3029 # video
3030 video_id = renderer.get('videoId')
3031 if video_id:
3032 yield self._extract_video(renderer)
201c1459 3033 continue
8bdd16b4 3034 # channel
3035 channel_id = renderer.get('channelId')
3036 if channel_id:
3037 title = try_get(
3038 renderer, lambda x: x['title']['simpleText'], compat_str)
3039 yield self.url_result(
3040 'https://www.youtube.com/channel/%s' % channel_id,
3041 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3042 continue
3043 # generic endpoint URL support
3044 ep_url = urljoin('https://www.youtube.com/', try_get(
3045 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3046 compat_str))
3047 if ep_url:
3048 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3049 if ie.suitable(ep_url):
3050 yield self.url_result(
3051 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3052 break
8bdd16b4 3053
3d3dddc9 3054 def _shelf_entries_from_content(self, shelf_renderer):
3055 content = shelf_renderer.get('content')
3056 if not isinstance(content, dict):
8bdd16b4 3057 return
cd7c66cf 3058 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3059 if renderer:
3060 # TODO: add support for nested playlists so each shelf is processed
3061 # as separate playlist
3062 # TODO: this includes only first N items
3063 for entry in self._grid_entries(renderer):
3064 yield entry
3065 renderer = content.get('horizontalListRenderer')
3066 if renderer:
3067 # TODO
3068 pass
8bdd16b4 3069
29f7c58a 3070 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3071 ep = try_get(
3072 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3073 compat_str)
3074 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3075 if shelf_url:
29f7c58a 3076 # Skipping links to another channels, note that checking for
3077 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3078 # will not work
3079 if skip_channels and '/channels?' in shelf_url:
3080 return
3d3dddc9 3081 title = try_get(
3082 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3083 yield self.url_result(shelf_url, video_title=title)
3084 # Shelf may not contain shelf URL, fallback to extraction from content
3085 for entry in self._shelf_entries_from_content(shelf_renderer):
3086 yield entry
c5e8d7af 3087
8bdd16b4 3088 def _playlist_entries(self, video_list_renderer):
3089 for content in video_list_renderer['contents']:
3090 if not isinstance(content, dict):
3091 continue
3092 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3093 if not isinstance(renderer, dict):
3094 continue
3095 video_id = renderer.get('videoId')
3096 if not video_id:
3097 continue
3098 yield self._extract_video(renderer)
07aeced6 3099
3462ffa8 3100 def _rich_entries(self, rich_grid_renderer):
3101 renderer = try_get(
70d5c17b 3102 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3103 video_id = renderer.get('videoId')
3104 if not video_id:
3105 return
3106 yield self._extract_video(renderer)
3107
8bdd16b4 3108 def _video_entry(self, video_renderer):
3109 video_id = video_renderer.get('videoId')
3110 if video_id:
3111 return self._extract_video(video_renderer)
dacb3a86 3112
8bdd16b4 3113 def _post_thread_entries(self, post_thread_renderer):
3114 post_renderer = try_get(
3115 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3116 if not post_renderer:
3117 return
3118 # video attachment
3119 video_renderer = try_get(
895b0931 3120 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3121 video_id = video_renderer.get('videoId')
3122 if video_id:
3123 entry = self._extract_video(video_renderer)
8bdd16b4 3124 if entry:
3125 yield entry
895b0931 3126 # playlist attachment
3127 playlist_id = try_get(
3128 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3129 if playlist_id:
3130 yield self.url_result(
e28f1c0a 3131 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3132 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3133 # inline video links
3134 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3135 for run in runs:
3136 if not isinstance(run, dict):
3137 continue
3138 ep_url = try_get(
3139 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3140 if not ep_url:
3141 continue
3142 if not YoutubeIE.suitable(ep_url):
3143 continue
3144 ep_video_id = YoutubeIE._match_id(ep_url)
3145 if video_id == ep_video_id:
3146 continue
895b0931 3147 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3148
8bdd16b4 3149 def _post_thread_continuation_entries(self, post_thread_continuation):
3150 contents = post_thread_continuation.get('contents')
3151 if not isinstance(contents, list):
3152 return
3153 for content in contents:
3154 renderer = content.get('backstagePostThreadRenderer')
3155 if not isinstance(renderer, dict):
3156 continue
3157 for entry in self._post_thread_entries(renderer):
3158 yield entry
07aeced6 3159
39ed931e 3160 r''' # unused
3161 def _rich_grid_entries(self, contents):
3162 for content in contents:
3163 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3164 if video_renderer:
3165 entry = self._video_entry(video_renderer)
3166 if entry:
3167 yield entry
3168 '''
3169
29f7c58a 3170 @staticmethod
3171 def _build_continuation_query(continuation, ctp=None):
3172 query = {
3173 'ctoken': continuation,
3174 'continuation': continuation,
3175 }
3176 if ctp:
3177 query['itct'] = ctp
3178 return query
3179
8bdd16b4 3180 @staticmethod
3181 def _extract_next_continuation_data(renderer):
3182 next_continuation = try_get(
3183 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3184 if not next_continuation:
3185 return
3186 continuation = next_continuation.get('continuation')
3187 if not continuation:
3188 return
3189 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3190 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3191
8bdd16b4 3192 @classmethod
3193 def _extract_continuation(cls, renderer):
3194 next_continuation = cls._extract_next_continuation_data(renderer)
3195 if next_continuation:
3196 return next_continuation
cc2db878 3197 contents = []
3198 for key in ('contents', 'items'):
3199 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3200 for content in contents:
3201 if not isinstance(content, dict):
3202 continue
3203 continuation_ep = try_get(
3204 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3205 dict)
3206 if not continuation_ep:
3207 continue
3208 continuation = try_get(
3209 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3210 if not continuation:
3211 continue
3212 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3213 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3214
f4f751af 3215 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3216
70d5c17b 3217 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3218 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3219 for content in contents:
3220 if not isinstance(content, dict):
8bdd16b4 3221 continue
70d5c17b 3222 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3223 if not is_renderer:
70d5c17b 3224 renderer = content.get('richItemRenderer')
3462ffa8 3225 if renderer:
3226 for entry in self._rich_entries(renderer):
3227 yield entry
3228 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3229 continue
3462ffa8 3230 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3231 for isr_content in isr_contents:
3232 if not isinstance(isr_content, dict):
3233 continue
69184e41 3234
3235 known_renderers = {
3236 'playlistVideoListRenderer': self._playlist_entries,
3237 'gridRenderer': self._grid_entries,
3238 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3239 'backstagePostThreadRenderer': self._post_thread_entries,
3240 'videoRenderer': lambda x: [self._video_entry(x)],
3241 }
3242 for key, renderer in isr_content.items():
3243 if key not in known_renderers:
3244 continue
3245 for entry in known_renderers[key](renderer):
3246 if entry:
3247 yield entry
3462ffa8 3248 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3249 break
70d5c17b 3250
3462ffa8 3251 if not continuation_list[0]:
3252 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3253
3254 if not continuation_list[0]:
3255 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3256
3257 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3258 tab_content = try_get(tab, lambda x: x['content'], dict)
3259 if not tab_content:
3260 return
3462ffa8 3261 parent_renderer = (
29f7c58a 3262 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3263 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3264 for entry in extract_entries(parent_renderer):
3265 yield entry
3462ffa8 3266 continuation = continuation_list[0]
f4f751af 3267 context = self._extract_context(ytcfg)
3268 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3269
8bdd16b4 3270 for page_num in itertools.count(1):
3271 if not continuation:
3272 break
79360d99 3273 query = {
3274 'continuation': continuation['continuation'],
3275 'clickTracking': {'clickTrackingParams': continuation['itct']}
3276 }
f4f751af 3277 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3278 response = self._extract_response(
3279 item_id='%s page %s' % (item_id, page_num),
3280 query=query, headers=headers, ytcfg=ytcfg,
3281 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3282
3283 if not response:
8bdd16b4 3284 break
f4f751af 3285 visitor_data = try_get(
3286 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3287
69184e41 3288 known_continuation_renderers = {
3289 'playlistVideoListContinuation': self._playlist_entries,
3290 'gridContinuation': self._grid_entries,
3291 'itemSectionContinuation': self._post_thread_continuation_entries,
3292 'sectionListContinuation': extract_entries, # for feeds
3293 }
8bdd16b4 3294 continuation_contents = try_get(
69184e41 3295 response, lambda x: x['continuationContents'], dict) or {}
3296 continuation_renderer = None
3297 for key, value in continuation_contents.items():
3298 if key not in known_continuation_renderers:
3462ffa8 3299 continue
69184e41 3300 continuation_renderer = value
3301 continuation_list = [None]
3302 for entry in known_continuation_renderers[key](continuation_renderer):
3303 yield entry
3304 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3305 break
3306 if continuation_renderer:
3307 continue
c5e8d7af 3308
a1b535bd 3309 known_renderers = {
3310 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3311 'gridVideoRenderer': (self._grid_entries, 'items'),
3312 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3313 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3314 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3315 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3316 }
cce889b9 3317 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3318 continuation_items = try_get(
cce889b9 3319 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3320 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3321 video_items_renderer = None
3322 for key, value in continuation_item.items():
3323 if key not in known_renderers:
8bdd16b4 3324 continue
a1b535bd 3325 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3326 continuation_list = [None]
a1b535bd 3327 for entry in known_renderers[key][0](video_items_renderer):
3328 yield entry
9ba5705a 3329 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3330 break
3331 if video_items_renderer:
3332 continue
8bdd16b4 3333 break
9558dcec 3334
8bdd16b4 3335 @staticmethod
3336 def _extract_selected_tab(tabs):
3337 for tab in tabs:
cd684175 3338 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3339 if renderer.get('selected') is True:
3340 return renderer
2b3c2546 3341 else:
8bdd16b4 3342 raise ExtractorError('Unable to find selected tab')
b82f815f 3343
8bdd16b4 3344 @staticmethod
3345 def _extract_uploader(data):
3346 uploader = {}
3347 sidebar_renderer = try_get(
3348 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3349 if sidebar_renderer:
3350 for item in sidebar_renderer:
3351 if not isinstance(item, dict):
3352 continue
3353 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3354 if not isinstance(renderer, dict):
3355 continue
3356 owner = try_get(
3357 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3358 if owner:
3359 uploader['uploader'] = owner.get('text')
3360 uploader['uploader_id'] = try_get(
3361 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3362 uploader['uploader_url'] = urljoin(
3363 'https://www.youtube.com/',
3364 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3365 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3366
d069eca7 3367 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3368 playlist_id = title = description = channel_url = channel_name = channel_id = None
3369 thumbnails_list = tags = []
3370
8bdd16b4 3371 selected_tab = self._extract_selected_tab(tabs)
3372 renderer = try_get(
3373 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3374 if renderer:
b60419c5 3375 channel_name = renderer.get('title')
3376 channel_url = renderer.get('channelUrl')
3377 channel_id = renderer.get('externalId')
39ed931e 3378 else:
64c0d954 3379 renderer = try_get(
3380 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3381
8bdd16b4 3382 if renderer:
3383 title = renderer.get('title')
ecc97af3 3384 description = renderer.get('description', '')
b60419c5 3385 playlist_id = channel_id
3386 tags = renderer.get('keywords', '').split()
3387 thumbnails_list = (
3388 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3389 or try_get(
3390 data,
3391 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3392 list)
b60419c5 3393 or [])
3394
3395 thumbnails = []
3396 for t in thumbnails_list:
3397 if not isinstance(t, dict):
3398 continue
3399 thumbnail_url = url_or_none(t.get('url'))
3400 if not thumbnail_url:
3401 continue
3402 thumbnails.append({
3403 'url': thumbnail_url,
3404 'width': int_or_none(t.get('width')),
3405 'height': int_or_none(t.get('height')),
3406 })
3462ffa8 3407 if playlist_id is None:
70d5c17b 3408 playlist_id = item_id
3409 if title is None:
39ed931e 3410 title = (
3411 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3412 or playlist_id)
b60419c5 3413 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3414 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3415
3416 metadata = {
3417 'playlist_id': playlist_id,
3418 'playlist_title': title,
3419 'playlist_description': description,
3420 'uploader': channel_name,
3421 'uploader_id': channel_id,
3422 'uploader_url': channel_url,
3423 'thumbnails': thumbnails,
3424 'tags': tags,
3425 }
3426 if not channel_id:
3427 metadata.update(self._extract_uploader(data))
3428 metadata.update({
3429 'channel': metadata['uploader'],
3430 'channel_id': metadata['uploader_id'],
3431 'channel_url': metadata['uploader_url']})
3432 return self.playlist_result(
d069eca7
M
3433 self._entries(
3434 selected_tab, playlist_id,
3435 self._extract_identity_token(webpage, item_id),
f4f751af 3436 self._extract_account_syncid(data),
3437 self._extract_ytcfg(item_id, webpage)),
b60419c5 3438 **metadata)
73c4ac2c 3439
79360d99 3440 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3441 first_id = last_id = None
79360d99 3442 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3443 headers = self._generate_api_headers(
3444 ytcfg, account_syncid=self._extract_account_syncid(data),
3445 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3446 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3447 for page_num in itertools.count(1):
cd7c66cf 3448 videos = list(self._playlist_entries(playlist))
3449 if not videos:
3450 return
2be71994 3451 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3452 if start >= len(videos):
3453 return
3454 for video in videos[start:]:
3455 if video['id'] == first_id:
3456 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3457 return
3458 yield video
3459 first_id = first_id or videos[0]['id']
3460 last_id = videos[-1]['id']
79360d99 3461 watch_endpoint = try_get(
3462 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3463 query = {
3464 'playlistId': playlist_id,
3465 'videoId': watch_endpoint.get('videoId') or last_id,
3466 'index': watch_endpoint.get('index') or len(videos),
3467 'params': watch_endpoint.get('params') or 'OAE%3D'
3468 }
3469 response = self._extract_response(
3470 item_id='%s page %d' % (playlist_id, page_num),
3471 query=query,
3472 ep='next',
3473 headers=headers,
3474 check_get_keys='contents'
3475 )
cd7c66cf 3476 playlist = try_get(
79360d99 3477 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3478
79360d99 3479 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3480 title = playlist.get('title') or try_get(
3481 data, lambda x: x['titleText']['simpleText'], compat_str)
3482 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3483
3484 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3485 playlist_url = urljoin(url, try_get(
3486 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3487 compat_str))
3488 if playlist_url and playlist_url != url:
3489 return self.url_result(
3490 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3491 video_title=title)
cd7c66cf 3492
8bdd16b4 3493 return self.playlist_result(
79360d99 3494 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3495 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3496
95c01b6c 3497 @staticmethod
3498 def _extract_alerts(data):
3499 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3500 if not isinstance(alert_dict, dict):
3501 continue
3502 for alert in alert_dict.values():
3503 alert_type = alert.get('type')
3504 if not alert_type:
02ced43c 3505 continue
95c01b6c 3506 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
3507 if message:
3508 yield alert_type, message
3509 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3510 message += try_get(run, lambda x: x['text'], compat_str)
3511 if message:
3512 yield alert_type, message
3513
3514 def _report_alerts(self, alerts, expected=True):
3ffc7c89 3515 errors = []
3516 warnings = []
95c01b6c 3517 for alert_type, alert_message in alerts:
f3eaa8dd 3518 if alert_type.lower() == 'error':
3ffc7c89 3519 errors.append([alert_type, alert_message])
f3eaa8dd 3520 else:
3ffc7c89 3521 warnings.append([alert_type, alert_message])
f3eaa8dd 3522
3ffc7c89 3523 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3524 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3525 if errors:
3526 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3527
95c01b6c 3528 def _extract_and_report_alerts(self, data, *args, **kwargs):
3529 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
3530
358de58c 3531 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3532 """
3533 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3534 """
3535 sidebar_renderer = try_get(
5d342002 3536 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3537 if not sidebar_renderer:
3538 return
3539 browse_id = params = None
358de58c 3540 for item in sidebar_renderer:
3541 if not isinstance(item, dict):
3542 continue
3543 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3544 menu_renderer = try_get(
3545 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3546 for menu_item in menu_renderer:
3547 if not isinstance(menu_item, dict):
3548 continue
3549 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3550 text = try_get(
3551 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3552 if not text or text.lower() != 'show unavailable videos':
3553 continue
3554 browse_endpoint = try_get(
3555 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3556 browse_id = browse_endpoint.get('browseId')
3557 params = browse_endpoint.get('params')
5d342002 3558 break
3559
3560 ytcfg = self._extract_ytcfg(item_id, webpage)
3561 headers = self._generate_api_headers(
3562 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3563 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3564 visitor_data=try_get(
3565 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3566 query = {
3567 'params': params or 'wgYCCAA=',
3568 'browseId': browse_id or 'VL%s' % item_id
3569 }
3570 return self._extract_response(
3571 item_id=item_id, headers=headers, query=query,
3572 check_get_keys='contents', fatal=False,
3573 note='Downloading API JSON with unavailable videos')
358de58c 3574
79360d99 3575 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3576 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3577 response = None
3578 last_error = None
3579 count = -1
a06916d9 3580 retries = self.get_param('extractor_retries', 3)
79360d99 3581 if check_get_keys is None:
3582 check_get_keys = []
3583 while count < retries:
3584 count += 1
3585 if last_error:
3586 self.report_warning('%s. Retrying ...' % last_error)
3587 try:
3588 response = self._call_api(
3589 ep=ep, fatal=True, headers=headers,
358de58c 3590 video_id=item_id, query=query,
79360d99 3591 context=self._extract_context(ytcfg),
3592 api_key=self._extract_api_key(ytcfg),
3593 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3594 except ExtractorError as e:
3595 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3596 # Downloading page may result in intermittent 5xx HTTP error
3597 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3598 last_error = 'HTTP Error %s' % e.cause.code
3599 if count < retries:
3600 continue
358de58c 3601 if fatal:
3602 raise
3603 else:
3604 self.report_warning(error_to_compat_str(e))
3605 return
3606
79360d99 3607 else:
3608 # Youtube may send alerts if there was an issue with the continuation page
4ba00108 3609 try:
3610 self._extract_and_report_alerts(response, expected=False)
3611 except ExtractorError as e:
3612 if fatal:
3613 raise
3614 self.report_warning(error_to_compat_str(e))
3615 return
79360d99 3616 if not check_get_keys or dict_get(response, check_get_keys):
3617 break
3618 # Youtube sometimes sends incomplete data
3619 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3620 last_error = 'Incomplete data received'
3621 if count >= retries:
358de58c 3622 if fatal:
3623 raise ExtractorError(last_error)
3624 else:
3625 self.report_warning(last_error)
3626 return
79360d99 3627 return response
3628
cd7c66cf 3629 def _extract_webpage(self, url, item_id):
a06916d9 3630 retries = self.get_param('extractor_retries', 3)
62bff2c1 3631 count = -1
c705177d 3632 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3633 while count < retries:
62bff2c1 3634 count += 1
14fdfea9 3635 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3636 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3637 if count:
c705177d 3638 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3639 webpage = self._download_webpage(
3640 url, item_id,
cd7c66cf 3641 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3642 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3643 if data.get('contents') or data.get('currentVideoEndpoint'):
3644 break
95c01b6c 3645 # Extract alerts here only when there is error
3646 self._extract_and_report_alerts(data)
c705177d 3647 if count >= retries:
6a39ee13 3648 raise ExtractorError(last_error)
cd7c66cf 3649 return webpage, data
3650
9297939e 3651 @staticmethod
3652 def _smuggle_data(entries, data):
3653 for entry in entries:
3654 if data:
3655 entry['url'] = smuggle_url(entry['url'], data)
3656 yield entry
3657
cd7c66cf 3658 def _real_extract(self, url):
9297939e 3659 url, smuggled_data = unsmuggle_url(url, {})
3660 if self.is_music_url(url):
3661 smuggled_data['is_music_url'] = True
fe03a6cd 3662 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3663 if info_dict.get('entries'):
3664 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3665 return info_dict
3666
fe03a6cd 3667 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3668
3669 def __real_extract(self, url, smuggled_data):
cd7c66cf 3670 item_id = self._match_id(url)
3671 url = compat_urlparse.urlunparse(
3672 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3673 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3674
fe03a6cd 3675 def get_mobj(url):
3676 mobj = self._url_re.match(url).groupdict()
07cce701 3677 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 3678 return mobj
3679
3680 mobj = get_mobj(url)
3681 # Youtube returns incomplete data if tabname is not lower case
3682 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
3683
3684 if is_channel:
3685 if smuggled_data.get('is_music_url'):
3686 if item_id[:2] == 'VL':
3687 # Youtube music VL channels have an equivalent playlist
3688 item_id = item_id[2:]
3689 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 3690 elif item_id[:2] == 'MP':
3691 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
3692 item_id = self._search_regex(
3693 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
3694 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
3695 'playlist id')
3696 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 3697 elif mobj['channel_type'] == 'browse':
3698 # Youtube music /browse/ should be changed to /channel/
3699 pre = 'https://www.youtube.com/channel/%s' % item_id
3700 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
3701 # Home URLs should redirect to /videos/
6a39ee13 3702 self.report_warning(
cd7c66cf 3703 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3704 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 3705 tab = '/videos'
3706
3707 url = ''.join((pre, tab, post))
3708 mobj = get_mobj(url)
cd7c66cf 3709
3710 # Handle both video/playlist URLs
201c1459 3711 qs = parse_qs(url)
cd7c66cf 3712 video_id = qs.get('v', [None])[0]
3713 playlist_id = qs.get('list', [None])[0]
3714
fe03a6cd 3715 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 3716 if not playlist_id:
fe03a6cd 3717 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 3718 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 3719 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 3720 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3721 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 3722 mobj = get_mobj(url)
cd7c66cf 3723
3724 if video_id and playlist_id:
a06916d9 3725 if self.get_param('noplaylist'):
cd7c66cf 3726 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3727 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3728 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3729
3730 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3731
18db7548 3732 tabs = try_get(
3733 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3734 if tabs:
3735 selected_tab = self._extract_selected_tab(tabs)
3736 tab_name = selected_tab.get('title', '')
09f1580e 3737 if 'no-youtube-channel-redirect' not in compat_opts:
3738 if mobj['tab'] == '/live':
3739 # Live tab should have redirected to the video
3740 raise ExtractorError('The channel is not currently live', expected=True)
3741 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
3742 if not mobj['not_channel'] and item_id[:2] == 'UC':
3743 # Topic channels don't have /videos. Use the equivalent playlist instead
3744 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
3745 pl_id = 'UU%s' % item_id[2:]
3746 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
3747 try:
3748 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
3749 for alert_type, alert_message in self._extract_alerts(pl_data):
3750 if alert_type == 'error':
3751 raise ExtractorError('Youtube said: %s' % alert_message)
3752 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
3753 except ExtractorError:
3754 self.report_warning('The playlist gave error. Falling back to channel URL')
3755 else:
3756 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 3757
3758 self.write_debug('Final URL: %s' % url)
3759
358de58c 3760 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3761 if 'no-youtube-unavailable-videos' not in compat_opts:
3762 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 3763 self._extract_and_report_alerts(data)
358de58c 3764
8bdd16b4 3765 tabs = try_get(
3766 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3767 if tabs:
d069eca7 3768 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3769
8bdd16b4 3770 playlist = try_get(
3771 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3772 if playlist:
79360d99 3773 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3774
a0566bbf 3775 video_id = try_get(
3776 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3777 compat_str) or video_id
8bdd16b4 3778 if video_id:
09f1580e 3779 if mobj['tab'] != '/live': # live tab is expected to redirect to video
3780 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3781 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3782
8bdd16b4 3783 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3784
c5e8d7af 3785
8bdd16b4 3786class YoutubePlaylistIE(InfoExtractor):
3787 IE_DESC = 'YouTube.com playlists'
3788 _VALID_URL = r'''(?x)(?:
3789 (?:https?://)?
3790 (?:\w+\.)?
3791 (?:
3792 (?:
3793 youtube(?:kids)?\.com|
29f7c58a 3794 invidio\.us
8bdd16b4 3795 )
3796 /.*?\?.*?\blist=
3797 )?
3798 (?P<id>%(playlist_id)s)
3799 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3800 IE_NAME = 'youtube:playlist'
cdc628a4 3801 _TESTS = [{
8bdd16b4 3802 'note': 'issue #673',
3803 'url': 'PLBB231211A4F62143',
cdc628a4 3804 'info_dict': {
8bdd16b4 3805 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3806 'id': 'PLBB231211A4F62143',
3807 'uploader': 'Wickydoo',
3808 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3809 },
3810 'playlist_mincount': 29,
3811 }, {
3812 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3813 'info_dict': {
3814 'title': 'YDL_safe_search',
3815 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3816 },
3817 'playlist_count': 2,
3818 'skip': 'This playlist is private',
9558dcec 3819 }, {
8bdd16b4 3820 'note': 'embedded',
3821 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3822 'playlist_count': 4,
9558dcec 3823 'info_dict': {
8bdd16b4 3824 'title': 'JODA15',
3825 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3826 'uploader': 'milan',
3827 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3828 }
cdc628a4 3829 }, {
8bdd16b4 3830 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3831 'playlist_mincount': 982,
3832 'info_dict': {
3833 'title': '2018 Chinese New Singles (11/6 updated)',
3834 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3835 'uploader': 'LBK',
3836 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3837 }
daa0df9e 3838 }, {
29f7c58a 3839 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3840 'only_matching': True,
3841 }, {
3842 # music album playlist
3843 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3844 'only_matching': True,
3845 }]
3846
3847 @classmethod
3848 def suitable(cls, url):
201c1459 3849 if YoutubeTabIE.suitable(url):
3850 return False
1bdae7d3 3851 # Hack for lazy extractors until more generic solution is implemented
3852 # (see #28780)
3853 from .youtube import parse_qs
201c1459 3854 qs = parse_qs(url)
3855 if qs.get('v', [None])[0]:
3856 return False
3857 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3858
3859 def _real_extract(self, url):
3860 playlist_id = self._match_id(url)
46953e7e 3861 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 3862 url = update_url_query(
3863 'https://www.youtube.com/playlist',
3864 parse_qs(url) or {'list': playlist_id})
3865 if is_music_url:
3866 url = smuggle_url(url, {'is_music_url': True})
3867 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 3868
3869
3870class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3871 IE_DESC = 'youtu.be'
29f7c58a 3872 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3873 _TESTS = [{
8bdd16b4 3874 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3875 'info_dict': {
3876 'id': 'yeWKywCrFtk',
3877 'ext': 'mp4',
3878 'title': 'Small Scale Baler and Braiding Rugs',
3879 'uploader': 'Backus-Page House Museum',
3880 'uploader_id': 'backuspagemuseum',
3881 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3882 'upload_date': '20161008',
3883 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3884 'categories': ['Nonprofits & Activism'],
3885 'tags': list,
3886 'like_count': int,
3887 'dislike_count': int,
3888 },
3889 'params': {
3890 'noplaylist': True,
3891 'skip_download': True,
3892 },
39e7107d 3893 }, {
8bdd16b4 3894 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3895 'only_matching': True,
cdc628a4
PH
3896 }]
3897
8bdd16b4 3898 def _real_extract(self, url):
29f7c58a 3899 mobj = re.match(self._VALID_URL, url)
3900 video_id = mobj.group('id')
3901 playlist_id = mobj.group('playlist_id')
8bdd16b4 3902 return self.url_result(
29f7c58a 3903 update_url_query('https://www.youtube.com/watch', {
3904 'v': video_id,
3905 'list': playlist_id,
3906 'feature': 'youtu.be',
3907 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3908
3909
3910class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3911 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3912 _VALID_URL = r'ytuser:(?P<id>.+)'
3913 _TESTS = [{
3914 'url': 'ytuser:phihag',
3915 'only_matching': True,
3916 }]
3917
3918 def _real_extract(self, url):
3919 user_id = self._match_id(url)
3920 return self.url_result(
3921 'https://www.youtube.com/user/%s' % user_id,
3922 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3923
b05654f0 3924
3d3dddc9 3925class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3926 IE_NAME = 'youtube:favorites'
3927 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3928 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3929 _LOGIN_REQUIRED = True
3930 _TESTS = [{
3931 'url': ':ytfav',
3932 'only_matching': True,
3933 }, {
3934 'url': ':ytfavorites',
3935 'only_matching': True,
3936 }]
3937
3938 def _real_extract(self, url):
3939 return self.url_result(
3940 'https://www.youtube.com/playlist?list=LL',
3941 ie=YoutubeTabIE.ie_key())
3942
3943
79360d99 3944class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3945 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3946 # there doesn't appear to be a real limit, for example if you search for
3947 # 'python' you get more than 8.000.000 results
3948 _MAX_RESULTS = float('inf')
78caa52a 3949 IE_NAME = 'youtube:search'
b05654f0 3950 _SEARCH_KEY = 'ytsearch'
6c894ea1 3951 _SEARCH_PARAMS = None
9dd8e46a 3952 _TESTS = []
b05654f0 3953
6c894ea1 3954 def _entries(self, query, n):
a5c56234 3955 data = {'query': query}
6c894ea1
U
3956 if self._SEARCH_PARAMS:
3957 data['params'] = self._SEARCH_PARAMS
3958 total = 0
3959 for page_num in itertools.count(1):
79360d99 3960 search = self._extract_response(
3961 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3962 check_get_keys=('contents', 'onResponseReceivedCommands')
3963 )
6c894ea1 3964 if not search:
b4c08069 3965 break
6c894ea1
U
3966 slr_contents = try_get(
3967 search,
3968 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3969 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3970 list)
3971 if not slr_contents:
a22b2fd1 3972 break
0366ae87 3973
0366ae87
M
3974 # Youtube sometimes adds promoted content to searches,
3975 # changing the index location of videos and token.
3976 # So we search through all entries till we find them.
30a074c2 3977 continuation_token = None
3978 for slr_content in slr_contents:
a96c6d15 3979 if continuation_token is None:
3980 continuation_token = try_get(
3981 slr_content,
3982 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3983 compat_str)
3984
30a074c2 3985 isr_contents = try_get(
3986 slr_content,
3987 lambda x: x['itemSectionRenderer']['contents'],
3988 list)
9da76d30 3989 if not isr_contents:
30a074c2 3990 continue
3991 for content in isr_contents:
3992 if not isinstance(content, dict):
3993 continue
3994 video = content.get('videoRenderer')
3995 if not isinstance(video, dict):
3996 continue
3997 video_id = video.get('videoId')
3998 if not video_id:
3999 continue
4000
4001 yield self._extract_video(video)
4002 total += 1
4003 if total == n:
4004 return
0366ae87 4005
0366ae87 4006 if not continuation_token:
6c894ea1 4007 break
0366ae87 4008 data['continuation'] = continuation_token
b05654f0 4009
6c894ea1
U
4010 def _get_n_results(self, query, n):
4011 """Get a specified number of results for a query"""
4012 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4013
c9ae7b95 4014
a3dd9248 4015class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4016 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4017 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4018 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4019 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4020
c9ae7b95 4021
386e1dd9 4022class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4023 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4024 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4025 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4026 # _MAX_RESULTS = 100
3462ffa8 4027 _TESTS = [{
4028 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4029 'playlist_mincount': 5,
4030 'info_dict': {
4031 'title': 'youtube-dl test video',
4032 }
4033 }, {
4034 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4035 'only_matching': True,
4036 }]
4037
386e1dd9 4038 @classmethod
4039 def _make_valid_url(cls):
4040 return cls._VALID_URL
4041
3462ffa8 4042 def _real_extract(self, url):
386e1dd9 4043 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4044 query = (qs.get('search_query') or qs.get('q'))[0]
4045 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4046 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4047
4048
4049class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4050 """
25f14e9f 4051 Base class for feed extractors
3d3dddc9 4052 Subclasses must define the _FEED_NAME property.
d7ae0639 4053 """
b2e8bc1b 4054 _LOGIN_REQUIRED = True
ef2f3c7f 4055 _TESTS = []
d7ae0639
JMF
4056
4057 @property
4058 def IE_NAME(self):
78caa52a 4059 return 'youtube:%s' % self._FEED_NAME
04cc9617 4060
3853309f 4061 def _real_extract(self, url):
3d3dddc9 4062 return self.url_result(
4063 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4064 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4065
4066
ef2f3c7f 4067class YoutubeWatchLaterIE(InfoExtractor):
4068 IE_NAME = 'youtube:watchlater'
70d5c17b 4069 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4070 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4071 _TESTS = [{
8bdd16b4 4072 'url': ':ytwatchlater',
bc7a9cd8
S
4073 'only_matching': True,
4074 }]
25f14e9f
S
4075
4076 def _real_extract(self, url):
ef2f3c7f 4077 return self.url_result(
4078 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4079
4080
25f14e9f
S
4081class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4082 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4083 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4084 _FEED_NAME = 'recommended'
45db527f 4085 _LOGIN_REQUIRED = False
3d3dddc9 4086 _TESTS = [{
4087 'url': ':ytrec',
4088 'only_matching': True,
4089 }, {
4090 'url': ':ytrecommended',
4091 'only_matching': True,
4092 }, {
4093 'url': 'https://youtube.com',
4094 'only_matching': True,
4095 }]
1ed5b5c9 4096
1ed5b5c9 4097
25f14e9f 4098class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4099 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4100 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4101 _FEED_NAME = 'subscriptions'
3d3dddc9 4102 _TESTS = [{
4103 'url': ':ytsubs',
4104 'only_matching': True,
4105 }, {
4106 'url': ':ytsubscriptions',
4107 'only_matching': True,
4108 }]
1ed5b5c9 4109
1ed5b5c9 4110
25f14e9f 4111class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4112 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4113 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4114 _FEED_NAME = 'history'
3d3dddc9 4115 _TESTS = [{
4116 'url': ':ythistory',
4117 'only_matching': True,
4118 }]
1ed5b5c9
JMF
4119
4120
15870e90
PH
4121class YoutubeTruncatedURLIE(InfoExtractor):
4122 IE_NAME = 'youtube:truncated_url'
4123 IE_DESC = False # Do not list
975d35db 4124 _VALID_URL = r'''(?x)
b95aab84
PH
4125 (?:https?://)?
4126 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4127 (?:watch\?(?:
c4808c60 4128 feature=[a-z_]+|
b95aab84
PH
4129 annotation_id=annotation_[^&]+|
4130 x-yt-cl=[0-9]+|
c1708b89 4131 hl=[^&]*|
287be8c6 4132 t=[0-9]+
b95aab84
PH
4133 )?
4134 |
4135 attribution_link\?a=[^&]+
4136 )
4137 $
975d35db 4138 '''
15870e90 4139
c4808c60 4140 _TESTS = [{
2d3d2997 4141 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4142 'only_matching': True,
dc2fc736 4143 }, {
2d3d2997 4144 'url': 'https://www.youtube.com/watch?',
dc2fc736 4145 'only_matching': True,
b95aab84
PH
4146 }, {
4147 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4148 'only_matching': True,
4149 }, {
4150 'url': 'https://www.youtube.com/watch?feature=foo',
4151 'only_matching': True,
c1708b89
PH
4152 }, {
4153 'url': 'https://www.youtube.com/watch?hl=en-GB',
4154 'only_matching': True,
287be8c6
PH
4155 }, {
4156 'url': 'https://www.youtube.com/watch?t=2372',
4157 'only_matching': True,
c4808c60
PH
4158 }]
4159
15870e90
PH
4160 def _real_extract(self, url):
4161 raise ExtractorError(
78caa52a
PH
4162 'Did you forget to quote the URL? Remember that & is a meta '
4163 'character in most shells, so you want to put the URL in quotes, '
3867038a 4164 'like youtube-dl '
2d3d2997 4165 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4166 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4167 expected=True)
772fd5cc
PH
4168
4169
4170class YoutubeTruncatedIDIE(InfoExtractor):
4171 IE_NAME = 'youtube:truncated_id'
4172 IE_DESC = False # Do not list
b95aab84 4173 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4174
4175 _TESTS = [{
4176 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4177 'only_matching': True,
4178 }]
4179
4180 def _real_extract(self, url):
4181 video_id = self._match_id(url)
4182 raise ExtractorError(
4183 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4184 expected=True)