]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[funimation] Add `FunimationShowIE` (#442)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
bea74222 70 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
9d5d4d64 88
89 def warn(message):
90 self.report_warning(message)
91
92 # username+password login is broken
93 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
94 self.raise_login_required(
95 'Login details are needed to download this content', method='cookies')
68217024 96 username, password = self._get_login_info()
9d5d4d64 97 if username:
98 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
99 return
100 # Everything below this is broken!
101
b2e8bc1b
JMF
102 # No authentication to be performed
103 if username is None:
a06916d9 104 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 105 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 106 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 107 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 108 return True
b2e8bc1b 109
7cc3570e
PH
110 login_page = self._download_webpage(
111 self._LOGIN_URL, None,
69ea8ca4
PH
112 note='Downloading login page',
113 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
114 if login_page is False:
115 return
b2e8bc1b 116
1212e997 117 login_form = self._hidden_inputs(login_page)
c5e8d7af 118
e00eb564
S
119 def req(url, f_req, note, errnote):
120 data = login_form.copy()
121 data.update({
122 'pstMsg': 1,
123 'checkConnection': 'youtube',
124 'checkedDomains': 'youtube',
125 'hl': 'en',
126 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 127 'f.req': json.dumps(f_req),
e00eb564
S
128 'flowName': 'GlifWebSignIn',
129 'flowEntry': 'ServiceLogin',
baf67a60
S
130 # TODO: reverse actual botguard identifier generation algo
131 'bgRequest': '["identifier",""]',
041bc3ad 132 })
e00eb564
S
133 return self._download_json(
134 url, None, note=note, errnote=errnote,
135 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
136 fatal=False,
137 data=urlencode_postdata(data), headers={
138 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
139 'Google-Accounts-XSRF': 1,
140 })
141
3995d37d
S
142 lookup_req = [
143 username,
144 None, [], None, 'US', None, None, 2, False, True,
145 [
146 None, None,
147 [2, 1, None, 1,
148 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
149 None, [], 4],
150 1, [None, None, []], None, None, None, True
151 ],
152 username,
153 ]
154
e00eb564 155 lookup_results = req(
3995d37d 156 self._LOOKUP_URL, lookup_req,
e00eb564
S
157 'Looking up account info', 'Unable to look up account info')
158
159 if lookup_results is False:
160 return False
041bc3ad 161
3995d37d
S
162 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
163 if not user_hash:
164 warn('Unable to extract user hash')
165 return False
166
167 challenge_req = [
168 user_hash,
169 None, 1, None, [1, None, None, None, [password, None, True]],
170 [
171 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
172 1, [None, None, []], None, None, None, True
173 ]]
83317f69 174
3995d37d
S
175 challenge_results = req(
176 self._CHALLENGE_URL, challenge_req,
177 'Logging in', 'Unable to log in')
83317f69 178
3995d37d 179 if challenge_results is False:
e00eb564 180 return
83317f69 181
3995d37d
S
182 login_res = try_get(challenge_results, lambda x: x[0][5], list)
183 if login_res:
184 login_msg = try_get(login_res, lambda x: x[5], compat_str)
185 warn(
186 'Unable to login: %s' % 'Invalid password'
187 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
188 return False
189
190 res = try_get(challenge_results, lambda x: x[0][-1], list)
191 if not res:
192 warn('Unable to extract result entry')
193 return False
194
9a6628aa
S
195 login_challenge = try_get(res, lambda x: x[0][0], list)
196 if login_challenge:
197 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
198 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
199 # SEND_SUCCESS - TFA code has been successfully sent to phone
200 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 201 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
202 if status == 'QUOTA_EXCEEDED':
203 warn('Exceeded the limit of TFA codes, try later')
204 return False
205
206 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
207 if not tl:
208 warn('Unable to extract TL')
209 return False
210
211 tfa_code = self._get_tfa_info('2-step verification code')
212
213 if not tfa_code:
214 warn(
215 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
216 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
217 return False
218
219 tfa_code = remove_start(tfa_code, 'G-')
220
221 tfa_req = [
222 user_hash, None, 2, None,
223 [
224 9, None, None, None, None, None, None, None,
225 [None, tfa_code, True, 2]
226 ]]
227
228 tfa_results = req(
229 self._TFA_URL.format(tl), tfa_req,
230 'Submitting TFA code', 'Unable to submit TFA code')
231
232 if tfa_results is False:
233 return False
234
235 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
236 if tfa_res:
237 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
238 warn(
239 'Unable to finish TFA: %s' % 'Invalid TFA code'
240 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
241 return False
242
243 check_cookie_url = try_get(
244 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
245 else:
246 CHALLENGES = {
247 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
248 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
249 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
250 }
251 challenge = CHALLENGES.get(
252 challenge_str,
253 '%s returned error %s.' % (self.IE_NAME, challenge_str))
254 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
255 return False
3995d37d
S
256 else:
257 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
258
259 if not check_cookie_url:
260 warn('Unable to extract CheckCookie URL')
261 return False
e00eb564
S
262
263 check_cookie_results = self._download_webpage(
3995d37d
S
264 check_cookie_url, None, 'Checking cookie', fatal=False)
265
266 if check_cookie_results is False:
267 return False
e00eb564 268
3995d37d
S
269 if 'https://myaccount.google.com/' not in check_cookie_results:
270 warn('Unable to log in')
b2e8bc1b 271 return False
e00eb564 272
b2e8bc1b
JMF
273 return True
274
cce889b9 275 def _initialize_consent(self):
276 cookies = self._get_cookies('https://www.youtube.com/')
277 if cookies.get('__Secure-3PSID'):
278 return
279 consent_id = None
280 consent = cookies.get('CONSENT')
281 if consent:
282 if 'YES' in consent.value:
283 return
284 consent_id = self._search_regex(
285 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
286 if not consent_id:
287 consent_id = random.randint(100, 999)
288 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 289
b2e8bc1b 290 def _real_initialize(self):
cce889b9 291 self._initialize_consent()
b2e8bc1b
JMF
292 if self._downloader is None:
293 return
b2e8bc1b
JMF
294 if not self._login():
295 return
c5e8d7af 296
f4f751af 297 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
298 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 299 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 300 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
301 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 302
a5c56234 303 def _generate_sapisidhash_header(self):
1974e99f 304 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
305 # See: https://github.com/yt-dlp/yt-dlp/issues/393
306 yt_cookies = self._get_cookies('https://www.youtube.com')
307 sapisid_cookie = dict_get(
308 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
309 if sapisid_cookie is None:
310 return
311 time_now = round(time.time())
1974e99f 312 # SAPISID cookie is required if not already present
313 if not yt_cookies.get('SAPISID'):
314 self._set_cookie(
315 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
316 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
317 sapisidhash = hashlib.sha1(
318 f'{time_now} {sapisid_cookie.value} https://www.youtube.com'.encode('utf-8')).hexdigest()
319 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
320
321 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 322 note='Downloading API JSON', errnote='Unable to download API page',
323 context=None, api_key=None):
324
325 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 326 data.update(query)
f4f751af 327 real_headers = self._generate_api_headers()
328 real_headers.update({'content-type': 'application/json'})
329 if headers:
330 real_headers.update(headers)
545cc85d 331 return self._download_json(
a5c56234
M
332 'https://www.youtube.com/youtubei/v1/%s' % ep,
333 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 334 data=json.dumps(data).encode('utf8'), headers=real_headers,
335 query={'key': api_key or self._extract_api_key()})
336
337 def _extract_api_key(self, ytcfg=None):
338 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 339
8bdd16b4 340 def _extract_yt_initial_data(self, video_id, webpage):
341 return self._parse_json(
342 self._search_regex(
29f7c58a 343 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 344 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 345 video_id)
0c148415 346
a1c5d2ca
M
347 def _extract_identity_token(self, webpage, item_id):
348 ytcfg = self._extract_ytcfg(item_id, webpage)
349 if ytcfg:
350 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
351 if token:
352 return token
353 return self._search_regex(
354 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
355 'identity token', default=None)
356
357 @staticmethod
358 def _extract_account_syncid(data):
8ea3f7b9 359 """
360 Extract syncId required to download private playlists of secondary channels
361 @param data Either response or ytcfg
362 """
363 sync_ids = (try_get(
364 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
365 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
366 if len(sync_ids) >= 2 and sync_ids[1]:
367 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
368 # and just "user_syncid||" for primary channel. We only want the channel_syncid
369 return sync_ids[0]
8ea3f7b9 370 # ytcfg includes channel_syncid if on secondary channel
371 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 372
29f7c58a 373 def _extract_ytcfg(self, video_id, webpage):
8c54a305 374 if not webpage:
375 return {}
29f7c58a 376 return self._parse_json(
377 self._search_regex(
378 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 379 default='{}'), video_id, fatal=False) or {}
380
381 def __extract_client_version(self, ytcfg):
382 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
383
384 def _extract_context(self, ytcfg=None):
385 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
386 if context:
387 return context
388
389 # Recreate the client context (required)
390 client_version = self.__extract_client_version(ytcfg)
391 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
392 context = {
393 'client': {
394 'clientName': client_name,
395 'clientVersion': client_version,
396 }
397 }
398 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
399 if visitor_data:
400 context['client']['visitorData'] = visitor_data
401 return context
402
403 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
404 headers = {
405 'X-YouTube-Client-Name': '1',
406 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
407 }
408 if identity_token:
409 headers['x-youtube-identity-token'] = identity_token
410 if account_syncid:
411 headers['X-Goog-PageId'] = account_syncid
412 headers['X-Goog-AuthUser'] = 0
413 if visitor_data:
414 headers['x-goog-visitor-id'] = visitor_data
415 auth = self._generate_sapisidhash_header()
416 if auth is not None:
417 headers['Authorization'] = auth
418 headers['X-Origin'] = 'https://www.youtube.com'
419 return headers
29f7c58a 420
9297939e 421 @staticmethod
422 def is_music_url(url):
423 return re.match(r'https?://music\.youtube\.com/', url) is not None
424
30a074c2 425 def _extract_video(self, renderer):
426 video_id = renderer.get('videoId')
427 title = try_get(
428 renderer,
429 (lambda x: x['title']['runs'][0]['text'],
430 lambda x: x['title']['simpleText']), compat_str)
431 description = try_get(
432 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
433 compat_str)
434 duration = parse_duration(try_get(
435 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
436 view_count_text = try_get(
437 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
438 view_count = str_to_int(self._search_regex(
439 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
440 'view count', default=None))
441 uploader = try_get(
bc2ca1bb 442 renderer,
443 (lambda x: x['ownerText']['runs'][0]['text'],
444 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 445 return {
39ed931e 446 '_type': 'url',
30a074c2 447 'ie_key': YoutubeIE.ie_key(),
448 'id': video_id,
449 'url': video_id,
450 'title': title,
451 'description': description,
452 'duration': duration,
453 'view_count': view_count,
454 'uploader': uploader,
455 }
456
0c148415 457
360e1ca5 458class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 459 IE_DESC = 'YouTube.com'
bc2ca1bb 460 _INVIDIOUS_SITES = (
461 # invidious-redirect websites
462 r'(?:www\.)?redirect\.invidious\.io',
463 r'(?:(?:www|dev)\.)?invidio\.us',
464 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
465 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 466 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 467 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 468 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 469 # youtube-dl invidious instances list
470 r'(?:(?:www|no)\.)?invidiou\.sh',
471 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
472 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 473 r'(?:www\.)?invidious\.mastodon\.host',
474 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 475 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 476 r'(?:www\.)?invidious\.tinfoil-hat\.net',
477 r'(?:www\.)?invidious\.himiko\.cloud',
478 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 479 r'(?:www\.)?invidious\.tube',
480 r'(?:www\.)?invidiou\.site',
481 r'(?:www\.)?invidious\.site',
482 r'(?:www\.)?invidious\.xyz',
483 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 484 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 485 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 486 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 487 r'(?:www\.)?tube\.poal\.co',
488 r'(?:www\.)?tube\.connect\.cafe',
489 r'(?:www\.)?vid\.wxzm\.sx',
490 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 491 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 492 r'(?:www\.)?yewtu\.be',
493 r'(?:www\.)?yt\.elukerio\.org',
494 r'(?:www\.)?yt\.lelux\.fi',
495 r'(?:www\.)?invidious\.ggc-project\.de',
496 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 497 r'(?:www\.)?ytprivate\.com',
498 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 499 r'(?:www\.)?invidious\.toot\.koeln',
500 r'(?:www\.)?invidious\.fdn\.fr',
501 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 502 r'(?:www\.)?invidious\.namazso\.eu',
503 r'(?:www\.)?invidious\.silkky\.cloud',
504 r'(?:www\.)?invidious\.exonip\.de',
505 r'(?:www\.)?invidious\.riverside\.rocks',
506 r'(?:www\.)?invidious\.blamefran\.net',
507 r'(?:www\.)?invidious\.moomoo\.de',
508 r'(?:www\.)?ytb\.trom\.tf',
509 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 510 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
511 r'(?:www\.)?qklhadlycap4cnod\.onion',
512 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
513 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
514 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
515 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
516 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
517 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 518 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
519 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
520 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
521 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 522 )
cb7dfeea 523 _VALID_URL = r"""(?x)^
c5e8d7af 524 (
edb53e2d 525 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 526 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
527 (?:www\.)?deturl\.com/www\.youtube\.com|
528 (?:www\.)?pwnyoutube\.com|
529 (?:www\.)?hooktube\.com|
530 (?:www\.)?yourepeat\.com|
531 tube\.majestyc\.net|
532 %(invidious)s|
533 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
534 (?:.*?\#/)? # handle anchor (#/) redirect urls
535 (?: # the various things that can precede the ID:
ac7553d0 536 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 537 |(?: # or the v= param in all its forms
f7000f3a 538 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 539 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 540 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
541 v=
542 )
f4b05232 543 ))
cbaed4bb
S
544 |(?:
545 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
546 vid\.plus| # or vid.plus/xxxx
547 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 548 %(invidious)s
cbaed4bb 549 )/
edb53e2d 550 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 551 )
c5e8d7af 552 )? # all until now is optional -> you can pass the naked ID
201c1459 553 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 554 (?(1).+)? # if we found the ID, everything can follow
9297939e 555 (?:\#|$)""" % {
bc2ca1bb 556 'invidious': '|'.join(_INVIDIOUS_SITES),
557 }
e40c758c 558 _PLAYER_INFO_RE = (
cc2db878 559 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
560 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 561 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 562 )
2c62dc26 563 _formats = {
c2d3cb4c 564 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
565 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
566 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
567 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
568 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
569 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
570 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
571 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 572 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 573 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
574 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
575 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
576 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
577 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
578 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 579 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 580 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
581 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 582
583
584 # 3D videos
c2d3cb4c 585 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
586 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
587 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
588 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 589 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
590 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
591 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 592
96fb5605 593 # Apple HTTP Live Streaming
11f12195 594 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 595 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
596 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
597 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
598 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
599 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 600 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
601 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
602
603 # DASH mp4 video
d23028a8
S
604 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
605 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
606 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
607 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
608 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 609 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
610 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
611 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
612 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
613 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
614 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
615 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 616
f6f1fc92 617 # Dash mp4 audio
d23028a8
S
618 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
619 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
620 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
621 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
622 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
623 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
624 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
625
626 # Dash webm
d23028a8
S
627 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
628 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
629 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
630 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
631 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
632 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
633 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
634 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
635 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
636 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
637 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
638 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
639 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
640 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
641 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 642 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
643 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
644 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
645 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
646 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
647 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
648 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
649
650 # Dash webm audio
d23028a8
S
651 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
652 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 653
0857baad 654 # Dash webm audio with opus inside
d23028a8
S
655 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
656 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
657 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 658
ce6b9a2d
PH
659 # RTMP (unnamed)
660 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
661
662 # av01 video only formats sometimes served with "unknown" codecs
663 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
664 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
665 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
666 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 667 }
29f7c58a 668 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 669
fd5c4aab
S
670 _GEO_BYPASS = False
671
78caa52a 672 IE_NAME = 'youtube'
2eb88d95
PH
673 _TESTS = [
674 {
2d3d2997 675 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
676 'info_dict': {
677 'id': 'BaW_jenozKc',
678 'ext': 'mp4',
3867038a 679 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
680 'uploader': 'Philipp Hagemeister',
681 'uploader_id': 'phihag',
ec85ded8 682 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
683 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
684 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 685 'upload_date': '20121002',
3867038a 686 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 687 'categories': ['Science & Technology'],
3867038a 688 'tags': ['youtube-dl'],
556dbe7f 689 'duration': 10,
dbdaaa23 690 'view_count': int,
3e7c1224
PH
691 'like_count': int,
692 'dislike_count': int,
7c80519c 693 'start_time': 1,
297a564b 694 'end_time': 9,
2eb88d95 695 }
0e853ca4 696 },
fccd3771 697 {
4bc3a23e
PH
698 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
699 'note': 'Embed-only video (#1746)',
700 'info_dict': {
701 'id': 'yZIXLfi8CZQ',
702 'ext': 'mp4',
703 'upload_date': '20120608',
704 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
705 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
706 'uploader': 'SET India',
94bfcd23 707 'uploader_id': 'setindia',
ec85ded8 708 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 709 'age_limit': 18,
545cc85d 710 },
711 'skip': 'Private video',
fccd3771 712 },
11b56058 713 {
8bdd16b4 714 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
715 'note': 'Use the first video ID in the URL',
716 'info_dict': {
717 'id': 'BaW_jenozKc',
718 'ext': 'mp4',
3867038a 719 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
720 'uploader': 'Philipp Hagemeister',
721 'uploader_id': 'phihag',
ec85ded8 722 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 723 'upload_date': '20121002',
3867038a 724 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 725 'categories': ['Science & Technology'],
3867038a 726 'tags': ['youtube-dl'],
556dbe7f 727 'duration': 10,
dbdaaa23 728 'view_count': int,
11b56058
PM
729 'like_count': int,
730 'dislike_count': int,
34a7de29
S
731 },
732 'params': {
733 'skip_download': True,
734 },
11b56058 735 },
dd27fd17 736 {
2d3d2997 737 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
738 'note': '256k DASH audio (format 141) via DASH manifest',
739 'info_dict': {
740 'id': 'a9LDPn-MO4I',
741 'ext': 'm4a',
742 'upload_date': '20121002',
743 'uploader_id': '8KVIDEO',
ec85ded8 744 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
745 'description': '',
746 'uploader': '8KVIDEO',
747 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 748 },
4bc3a23e
PH
749 'params': {
750 'youtube_include_dash_manifest': True,
751 'format': '141',
4919603f 752 },
de3c7fe0 753 'skip': 'format 141 not served anymore',
dd27fd17 754 },
8bdd16b4 755 # DASH manifest with encrypted signature
756 {
757 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
758 'info_dict': {
759 'id': 'IB3lcPjvWLA',
760 'ext': 'm4a',
761 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
762 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
763 'duration': 244,
764 'uploader': 'AfrojackVEVO',
765 'uploader_id': 'AfrojackVEVO',
766 'upload_date': '20131011',
cc2db878 767 'abr': 129.495,
8bdd16b4 768 },
769 'params': {
770 'youtube_include_dash_manifest': True,
771 'format': '141/bestaudio[ext=m4a]',
772 },
773 },
aa79ac0c
PH
774 # Controversy video
775 {
776 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
777 'info_dict': {
778 'id': 'T4XJQO3qol8',
779 'ext': 'mp4',
556dbe7f 780 'duration': 219,
aa79ac0c 781 'upload_date': '20100909',
4fe54c12 782 'uploader': 'Amazing Atheist',
aa79ac0c 783 'uploader_id': 'TheAmazingAtheist',
ec85ded8 784 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 785 'title': 'Burning Everyone\'s Koran',
545cc85d 786 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 787 }
c522adb1 788 },
dd2d55f1 789 # Normal age-gate video (embed allowed)
c522adb1 790 {
2d3d2997 791 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
792 'info_dict': {
793 'id': 'HtVdAasjOgU',
794 'ext': 'mp4',
795 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 796 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 797 'duration': 142,
c522adb1
JMF
798 'uploader': 'The Witcher',
799 'uploader_id': 'WitcherGame',
ec85ded8 800 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 801 'upload_date': '20140605',
34952f09 802 'age_limit': 18,
c522adb1
JMF
803 },
804 },
8bdd16b4 805 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
806 # YouTube Red ad is not captured for creator
807 {
808 'url': '__2ABJjxzNo',
809 'info_dict': {
810 'id': '__2ABJjxzNo',
811 'ext': 'mp4',
812 'duration': 266,
813 'upload_date': '20100430',
814 'uploader_id': 'deadmau5',
815 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 816 'creator': 'deadmau5',
817 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 818 'uploader': 'deadmau5',
819 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 820 'alt_title': 'Some Chords',
8bdd16b4 821 },
822 'expected_warnings': [
823 'DASH manifest missing',
824 ]
825 },
067aa17e 826 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
827 {
828 'url': 'lqQg6PlCWgI',
829 'info_dict': {
830 'id': 'lqQg6PlCWgI',
831 'ext': 'mp4',
556dbe7f 832 'duration': 6085,
90227264 833 'upload_date': '20150827',
cbe2bd91 834 'uploader_id': 'olympic',
ec85ded8 835 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 836 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 837 'uploader': 'Olympic',
cbe2bd91
PH
838 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
839 },
840 'params': {
841 'skip_download': 'requires avconv',
e52a40ab 842 }
cbe2bd91 843 },
6271f1ca
PH
844 # Non-square pixels
845 {
846 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
847 'info_dict': {
848 'id': '_b-2C3KPAM0',
849 'ext': 'mp4',
850 'stretched_ratio': 16 / 9.,
556dbe7f 851 'duration': 85,
6271f1ca
PH
852 'upload_date': '20110310',
853 'uploader_id': 'AllenMeow',
ec85ded8 854 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 855 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 856 'uploader': '孫ᄋᄅ',
6271f1ca
PH
857 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
858 },
06b491eb
S
859 },
860 # url_encoded_fmt_stream_map is empty string
861 {
862 'url': 'qEJwOuvDf7I',
863 'info_dict': {
864 'id': 'qEJwOuvDf7I',
f57b7835 865 'ext': 'webm',
06b491eb
S
866 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
867 'description': '',
868 'upload_date': '20150404',
869 'uploader_id': 'spbelect',
870 'uploader': 'Наблюдатели Петербурга',
871 },
872 'params': {
873 'skip_download': 'requires avconv',
e323cf3f
S
874 },
875 'skip': 'This live event has ended.',
06b491eb 876 },
067aa17e 877 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
878 {
879 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
880 'info_dict': {
881 'id': 'FIl7x6_3R5Y',
eb6793ba 882 'ext': 'webm',
da77d856
S
883 'title': 'md5:7b81415841e02ecd4313668cde88737a',
884 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 885 'duration': 220,
da77d856
S
886 'upload_date': '20150625',
887 'uploader_id': 'dorappi2000',
ec85ded8 888 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 889 'uploader': 'dorappi2000',
eb6793ba 890 'formats': 'mincount:31',
da77d856 891 },
eb6793ba 892 'skip': 'not actual anymore',
2ee8f5d8 893 },
8a1a26ce
YCH
894 # DASH manifest with segment_list
895 {
896 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
897 'md5': '8ce563a1d667b599d21064e982ab9e31',
898 'info_dict': {
899 'id': 'CsmdDsKjzN8',
900 'ext': 'mp4',
17ee98e1 901 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
902 'uploader': 'Airtek',
903 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
904 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
905 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
906 },
907 'params': {
908 'youtube_include_dash_manifest': True,
909 'format': '135', # bestvideo
be49068d
S
910 },
911 'skip': 'This live event has ended.',
2ee8f5d8 912 },
cf7e015f
S
913 {
914 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 915 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 916 'info_dict': {
545cc85d 917 'id': 'jvGDaLqkpTg',
918 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
919 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
920 },
921 'playlist': [{
922 'info_dict': {
545cc85d 923 'id': 'jvGDaLqkpTg',
cf7e015f 924 'ext': 'mp4',
545cc85d 925 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
926 'description': 'md5:e03b909557865076822aa169218d6a5d',
927 'duration': 10643,
928 'upload_date': '20161111',
929 'uploader': 'Team PGP',
930 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
931 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
932 },
933 }, {
934 'info_dict': {
545cc85d 935 'id': '3AKt1R1aDnw',
cf7e015f 936 'ext': 'mp4',
545cc85d 937 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
938 'description': 'md5:e03b909557865076822aa169218d6a5d',
939 'duration': 10991,
940 'upload_date': '20161111',
941 'uploader': 'Team PGP',
942 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
943 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
944 },
945 }, {
946 'info_dict': {
545cc85d 947 'id': 'RtAMM00gpVc',
cf7e015f 948 'ext': 'mp4',
545cc85d 949 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
950 'description': 'md5:e03b909557865076822aa169218d6a5d',
951 'duration': 10995,
952 'upload_date': '20161111',
953 'uploader': 'Team PGP',
954 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
955 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
956 },
957 }, {
958 'info_dict': {
545cc85d 959 'id': '6N2fdlP3C5U',
cf7e015f 960 'ext': 'mp4',
545cc85d 961 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
962 'description': 'md5:e03b909557865076822aa169218d6a5d',
963 'duration': 10990,
964 'upload_date': '20161111',
965 'uploader': 'Team PGP',
966 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
967 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
968 },
969 }],
970 'params': {
971 'skip_download': True,
972 },
cbaed4bb 973 },
f9f49d87 974 {
067aa17e 975 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
976 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
977 'info_dict': {
978 'id': 'gVfLd0zydlo',
979 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
980 },
981 'playlist_count': 2,
be49068d 982 'skip': 'Not multifeed anymore',
f9f49d87 983 },
cbaed4bb 984 {
2d3d2997 985 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 986 'only_matching': True,
0e49d9a6 987 },
6d4fc66b 988 {
2d3d2997 989 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
990 'only_matching': True,
991 },
0e49d9a6 992 {
067aa17e 993 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 994 # Also tests cut-off URL expansion in video description (see
067aa17e
S
995 # https://github.com/ytdl-org/youtube-dl/issues/1892,
996 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
997 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
998 'info_dict': {
999 'id': 'lsguqyKfVQg',
1000 'ext': 'mp4',
1001 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 1002 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 1003 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1004 'duration': 133,
0e49d9a6
LL
1005 'upload_date': '20151119',
1006 'uploader_id': 'IronSoulElf',
ec85ded8 1007 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1008 'uploader': 'IronSoulElf',
eb6793ba
S
1009 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1010 'track': 'Dark Walk - Position Music',
1011 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1012 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1013 },
1014 'params': {
1015 'skip_download': True,
1016 },
1017 },
61f92af1 1018 {
067aa17e 1019 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1020 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1021 'only_matching': True,
1022 },
313dfc45
LL
1023 {
1024 # Video with yt:stretch=17:0
1025 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1026 'info_dict': {
1027 'id': 'Q39EVAstoRM',
1028 'ext': 'mp4',
1029 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1030 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1031 'upload_date': '20151107',
1032 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1033 'uploader': 'CH GAMER DROID',
1034 },
1035 'params': {
1036 'skip_download': True,
1037 },
be49068d 1038 'skip': 'This video does not exist.',
313dfc45 1039 },
201c1459 1040 {
1041 # Video with incomplete 'yt:stretch=16:'
1042 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1043 'only_matching': True,
1044 },
7caf9830
S
1045 {
1046 # Video licensed under Creative Commons
1047 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1048 'info_dict': {
1049 'id': 'M4gD1WSo5mA',
1050 'ext': 'mp4',
1051 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1052 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1053 'duration': 721,
7caf9830
S
1054 'upload_date': '20150127',
1055 'uploader_id': 'BerkmanCenter',
ec85ded8 1056 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1057 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1058 'license': 'Creative Commons Attribution license (reuse allowed)',
1059 },
1060 'params': {
1061 'skip_download': True,
1062 },
1063 },
fd050249
S
1064 {
1065 # Channel-like uploader_url
1066 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1067 'info_dict': {
1068 'id': 'eQcmzGIKrzg',
1069 'ext': 'mp4',
1070 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1071 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1072 'duration': 4060,
fd050249 1073 'upload_date': '20151119',
eb6793ba 1074 'uploader': 'Bernie Sanders',
fd050249 1075 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1076 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1077 'license': 'Creative Commons Attribution license (reuse allowed)',
1078 },
1079 'params': {
1080 'skip_download': True,
1081 },
1082 },
040ac686
S
1083 {
1084 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1085 'only_matching': True,
7f29cf54
S
1086 },
1087 {
067aa17e 1088 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1089 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1090 'only_matching': True,
6496ccb4
S
1091 },
1092 {
1093 # Rental video preview
1094 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1095 'info_dict': {
1096 'id': 'uGpuVWrhIzE',
1097 'ext': 'mp4',
1098 'title': 'Piku - Trailer',
1099 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1100 'upload_date': '20150811',
1101 'uploader': 'FlixMatrix',
1102 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1104 'license': 'Standard YouTube License',
1105 },
1106 'params': {
1107 'skip_download': True,
1108 },
eb6793ba 1109 'skip': 'This video is not available.',
022a5d66 1110 },
12afdc2a
S
1111 {
1112 # YouTube Red video with episode data
1113 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1114 'info_dict': {
1115 'id': 'iqKdEhx-dD4',
1116 'ext': 'mp4',
1117 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1118 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1119 'duration': 2085,
12afdc2a
S
1120 'upload_date': '20170118',
1121 'uploader': 'Vsauce',
1122 'uploader_id': 'Vsauce',
1123 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1124 'series': 'Mind Field',
1125 'season_number': 1,
1126 'episode_number': 1,
1127 },
1128 'params': {
1129 'skip_download': True,
1130 },
1131 'expected_warnings': [
1132 'Skipping DASH manifest',
1133 ],
1134 },
c7121fa7
S
1135 {
1136 # The following content has been identified by the YouTube community
1137 # as inappropriate or offensive to some audiences.
1138 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1139 'info_dict': {
1140 'id': '6SJNVb0GnPI',
1141 'ext': 'mp4',
1142 'title': 'Race Differences in Intelligence',
1143 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1144 'duration': 965,
1145 'upload_date': '20140124',
1146 'uploader': 'New Century Foundation',
1147 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1148 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1149 },
1150 'params': {
1151 'skip_download': True,
1152 },
545cc85d 1153 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1154 },
022a5d66
S
1155 {
1156 # itag 212
1157 'url': '1t24XAntNCY',
1158 'only_matching': True,
fd5c4aab
S
1159 },
1160 {
1161 # geo restricted to JP
1162 'url': 'sJL6WA-aGkQ',
1163 'only_matching': True,
1164 },
cd5a74a2
S
1165 {
1166 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1167 'only_matching': True,
1168 },
bc2ca1bb 1169 {
1170 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1171 'only_matching': True,
1172 },
1173 {
1174 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1175 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1176 'only_matching': True,
1177 },
825cd268
RA
1178 {
1179 # DRM protected
1180 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1181 'only_matching': True,
4fe54c12
S
1182 },
1183 {
1184 # Video with unsupported adaptive stream type formats
1185 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1186 'info_dict': {
1187 'id': 'Z4Vy8R84T1U',
1188 'ext': 'mp4',
1189 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1190 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1191 'duration': 433,
1192 'upload_date': '20130923',
1193 'uploader': 'Amelia Putri Harwita',
1194 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1195 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1196 'formats': 'maxcount:10',
1197 },
1198 'params': {
1199 'skip_download': True,
1200 'youtube_include_dash_manifest': False,
1201 },
5429d6a9 1202 'skip': 'not actual anymore',
5caabd3c 1203 },
1204 {
822b9d9c 1205 # Youtube Music Auto-generated description
5caabd3c 1206 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1207 'info_dict': {
1208 'id': 'MgNrAu2pzNs',
1209 'ext': 'mp4',
1210 'title': 'Voyeur Girl',
1211 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1212 'upload_date': '20190312',
5429d6a9
S
1213 'uploader': 'Stephen - Topic',
1214 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1215 'artist': 'Stephen',
1216 'track': 'Voyeur Girl',
1217 'album': 'it\'s too much love to know my dear',
1218 'release_date': '20190313',
1219 'release_year': 2019,
1220 },
1221 'params': {
1222 'skip_download': True,
1223 },
1224 },
66b48727
RA
1225 {
1226 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1227 'only_matching': True,
1228 },
011e75e6
S
1229 {
1230 # invalid -> valid video id redirection
1231 'url': 'DJztXj2GPfl',
1232 'info_dict': {
1233 'id': 'DJztXj2GPfk',
1234 'ext': 'mp4',
1235 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1236 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1237 'upload_date': '20090125',
1238 'uploader': 'Prochorowka',
1239 'uploader_id': 'Prochorowka',
1240 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1241 'artist': 'Panjabi MC',
1242 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1243 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1244 },
1245 'params': {
1246 'skip_download': True,
1247 },
545cc85d 1248 'skip': 'Video unavailable',
ea74e00b
DP
1249 },
1250 {
1251 # empty description results in an empty string
1252 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1253 'info_dict': {
1254 'id': 'x41yOUIvK2k',
1255 'ext': 'mp4',
1256 'title': 'IMG 3456',
1257 'description': '',
1258 'upload_date': '20170613',
1259 'uploader_id': 'ElevageOrVert',
1260 'uploader': 'ElevageOrVert',
1261 },
1262 'params': {
1263 'skip_download': True,
1264 },
1265 },
a0566bbf 1266 {
29f7c58a 1267 # with '};' inside yt initial data (see [1])
1268 # see [2] for an example with '};' inside ytInitialPlayerResponse
1269 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1270 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1271 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1272 'info_dict': {
1273 'id': 'CHqg6qOn4no',
1274 'ext': 'mp4',
1275 'title': 'Part 77 Sort a list of simple types in c#',
1276 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1277 'upload_date': '20130831',
1278 'uploader_id': 'kudvenkat',
1279 'uploader': 'kudvenkat',
1280 },
1281 'params': {
1282 'skip_download': True,
1283 },
1284 },
29f7c58a 1285 {
1286 # another example of '};' in ytInitialData
1287 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1288 'only_matching': True,
1289 },
1290 {
1291 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1292 'only_matching': True,
1293 },
545cc85d 1294 {
cc2db878 1295 # https://github.com/ytdl-org/youtube-dl/pull/28094
1296 'url': 'OtqTfy26tG0',
1297 'info_dict': {
1298 'id': 'OtqTfy26tG0',
1299 'ext': 'mp4',
1300 'title': 'Burn Out',
1301 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1302 'upload_date': '20141120',
1303 'uploader': 'The Cinematic Orchestra - Topic',
1304 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1305 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1306 'artist': 'The Cinematic Orchestra',
1307 'track': 'Burn Out',
1308 'album': 'Every Day',
1309 'release_data': None,
1310 'release_year': None,
1311 },
1312 'params': {
1313 'skip_download': True,
1314 },
545cc85d 1315 },
bc2ca1bb 1316 {
1317 # controversial video, only works with bpctr when authenticated with cookies
1318 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1319 'only_matching': True,
1320 },
f7ad7160 1321 {
1322 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1323 'url': 'cBvYw8_A0vQ',
1324 'info_dict': {
1325 'id': 'cBvYw8_A0vQ',
1326 'ext': 'mp4',
1327 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1328 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1329 'upload_date': '20201120',
1330 'uploader': 'Walk around Japan',
1331 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1332 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1333 },
1334 'params': {
1335 'skip_download': True,
1336 },
0fb983f6 1337 }, {
1338 # Has multiple audio streams
1339 'url': 'WaOKSUlf4TM',
1340 'only_matching': True
9297939e 1341 }, {
1342 # Requires Premium: has format 141 when requested using YTM url
1343 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1344 'only_matching': True
1345 }, {
120916da 1346 # multiple subtitles with same lang_code
1347 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1348 'only_matching': True,
1349 },
2eb88d95
PH
1350 ]
1351
201c1459 1352 @classmethod
1353 def suitable(cls, url):
1bdae7d3 1354 # Hack for lazy extractors until more generic solution is implemented
1355 # (see #28780)
1356 from .youtube import parse_qs
201c1459 1357 qs = parse_qs(url)
1358 if qs.get('list', [None])[0]:
1359 return False
1360 return super(YoutubeIE, cls).suitable(url)
1361
e0df6211
PH
1362 def __init__(self, *args, **kwargs):
1363 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1364 self._code_cache = {}
83799698 1365 self._player_cache = {}
e0df6211 1366
60064c53
PH
1367 def _signature_cache_id(self, example_sig):
1368 """ Return a string representation of a signature """
78caa52a 1369 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1370
e40c758c
S
1371 @classmethod
1372 def _extract_player_info(cls, player_url):
1373 for player_re in cls._PLAYER_INFO_RE:
1374 id_m = re.search(player_re, player_url)
1375 if id_m:
1376 break
1377 else:
c081b35c 1378 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1379 return id_m.group('id')
e40c758c
S
1380
1381 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1382 player_id = self._extract_player_info(player_url)
e0df6211 1383
c4417ddb 1384 # Read from filesystem cache
545cc85d 1385 func_id = 'js_%s_%s' % (
1386 player_id, self._signature_cache_id(example_sig))
c4417ddb 1387 assert os.path.basename(func_id) == func_id
a0e07d31 1388
69ea8ca4 1389 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1390 if cache_spec is not None:
78caa52a 1391 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1392
545cc85d 1393 if player_id not in self._code_cache:
1394 self._code_cache[player_id] = self._download_webpage(
e0df6211 1395 player_url, video_id,
545cc85d 1396 note='Downloading player ' + player_id,
69ea8ca4 1397 errnote='Download of %s failed' % player_url)
545cc85d 1398 code = self._code_cache[player_id]
1399 res = self._parse_sig_js(code)
e0df6211 1400
785521bf
PH
1401 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1402 cache_res = res(test_string)
1403 cache_spec = [ord(c) for c in cache_res]
83799698 1404
69ea8ca4 1405 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1406 return res
1407
60064c53 1408 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1409 def gen_sig_code(idxs):
1410 def _genslice(start, end, step):
78caa52a 1411 starts = '' if start == 0 else str(start)
8bcc8756 1412 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1413 steps = '' if step == 1 else (':%d' % step)
78caa52a 1414 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1415
1416 step = None
7af808a5
PH
1417 # Quelch pyflakes warnings - start will be set when step is set
1418 start = '(Never used)'
edf3e38e
PH
1419 for i, prev in zip(idxs[1:], idxs[:-1]):
1420 if step is not None:
1421 if i - prev == step:
1422 continue
1423 yield _genslice(start, prev, step)
1424 step = None
1425 continue
1426 if i - prev in [-1, 1]:
1427 step = i - prev
1428 start = prev
1429 continue
1430 else:
78caa52a 1431 yield 's[%d]' % prev
edf3e38e 1432 if step is None:
78caa52a 1433 yield 's[%d]' % i
edf3e38e
PH
1434 else:
1435 yield _genslice(start, i, step)
1436
78caa52a 1437 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1438 cache_res = func(test_string)
edf3e38e 1439 cache_spec = [ord(c) for c in cache_res]
78caa52a 1440 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1441 signature_id_tuple = '(%s)' % (
1442 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1443 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1444 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1445 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1446
e0df6211
PH
1447 def _parse_sig_js(self, jscode):
1448 funcname = self._search_regex(
abefc03f
S
1449 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1450 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1451 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1452 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1453 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1454 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1455 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1456 # Obsolete patterns
1457 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1458 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1459 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1460 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1461 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1462 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1463 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1464 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1465 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1466
1467 jsi = JSInterpreter(jscode)
1468 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1469 return lambda s: initial_function([s])
1470
545cc85d 1471 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1472 """Turn the encrypted s field into a working signature"""
6b37f0be 1473
c8bf86d5 1474 if player_url is None:
69ea8ca4 1475 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1476
69ea8ca4 1477 if player_url.startswith('//'):
78caa52a 1478 player_url = 'https:' + player_url
3c90cc8b
S
1479 elif not re.match(r'https?://', player_url):
1480 player_url = compat_urlparse.urljoin(
1481 'https://www.youtube.com', player_url)
c8bf86d5 1482 try:
62af3a0e 1483 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1484 if player_id not in self._player_cache:
1485 func = self._extract_signature_function(
60064c53 1486 video_id, player_url, s
c8bf86d5
PH
1487 )
1488 self._player_cache[player_id] = func
1489 func = self._player_cache[player_id]
a06916d9 1490 if self.get_param('youtube_print_sig_code'):
60064c53 1491 self._print_sig_code(func, s)
c8bf86d5
PH
1492 return func(s)
1493 except Exception as e:
1494 tb = traceback.format_exc()
1495 raise ExtractorError(
78caa52a 1496 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1497
545cc85d 1498 def _mark_watched(self, video_id, player_response):
21c340b8
S
1499 playback_url = url_or_none(try_get(
1500 player_response,
545cc85d 1501 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1502 if not playback_url:
1503 return
1504 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1505 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1506
1507 # cpn generation algorithm is reverse engineered from base.js.
1508 # In fact it works even with dummy cpn.
1509 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1510 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1511
1512 qs.update({
1513 'ver': ['2'],
1514 'cpn': [cpn],
1515 })
1516 playback_url = compat_urlparse.urlunparse(
15707c7e 1517 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1518
1519 self._download_webpage(
1520 playback_url, video_id, 'Marking watched',
1521 'Unable to mark watched', fatal=False)
1522
66c9fa36
S
1523 @staticmethod
1524 def _extract_urls(webpage):
1525 # Embedded YouTube player
1526 entries = [
1527 unescapeHTML(mobj.group('url'))
1528 for mobj in re.finditer(r'''(?x)
1529 (?:
1530 <iframe[^>]+?src=|
1531 data-video-url=|
1532 <embed[^>]+?src=|
1533 embedSWF\(?:\s*|
1534 <object[^>]+data=|
1535 new\s+SWFObject\(
1536 )
1537 (["\'])
1538 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1539 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1540 \1''', webpage)]
1541
1542 # lazyYT YouTube embed
1543 entries.extend(list(map(
1544 unescapeHTML,
1545 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1546
1547 # Wordpress "YouTube Video Importer" plugin
1548 matches = re.findall(r'''(?x)<div[^>]+
1549 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1550 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1551 entries.extend(m[-1] for m in matches)
1552
1553 return entries
1554
1555 @staticmethod
1556 def _extract_url(webpage):
1557 urls = YoutubeIE._extract_urls(webpage)
1558 return urls[0] if urls else None
1559
97665381
PH
1560 @classmethod
1561 def extract_id(cls, url):
1562 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1563 if mobj is None:
69ea8ca4 1564 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1565 video_id = mobj.group(2)
1566 return video_id
1567
545cc85d 1568 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1569 chapters_list = try_get(
8bdd16b4 1570 data,
84213ea8
S
1571 lambda x: x['playerOverlays']
1572 ['playerOverlayRenderer']
1573 ['decoratedPlayerBarRenderer']
1574 ['decoratedPlayerBarRenderer']
1575 ['playerBar']
1576 ['chapteredPlayerBarRenderer']
1577 ['chapters'],
1578 list)
1579 if not chapters_list:
1580 return
1581
1582 def chapter_time(chapter):
1583 return float_or_none(
1584 try_get(
1585 chapter,
1586 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1587 int),
1588 scale=1000)
1589 chapters = []
1590 for next_num, chapter in enumerate(chapters_list, start=1):
1591 start_time = chapter_time(chapter)
1592 if start_time is None:
1593 continue
1594 end_time = (chapter_time(chapters_list[next_num])
1595 if next_num < len(chapters_list) else duration)
1596 if end_time is None:
1597 continue
1598 title = try_get(
1599 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1600 compat_str)
1601 chapters.append({
1602 'start_time': start_time,
1603 'end_time': end_time,
1604 'title': title,
1605 })
1606 return chapters
1607
545cc85d 1608 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1609 return self._parse_json(self._search_regex(
1610 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1611 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1612
d92f5d5a 1613 @staticmethod
1614 def parse_time_text(time_text):
1615 """
1616 Parse the comment time text
1617 time_text is in the format 'X units ago (edited)'
1618 """
1619 time_text_split = time_text.split(' ')
1620 if len(time_text_split) >= 3:
1621 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1622
a1c5d2ca
M
1623 @staticmethod
1624 def _join_text_entries(runs):
1625 text = None
1626 for run in runs:
1627 if not isinstance(run, dict):
1628 continue
1629 sub_text = try_get(run, lambda x: x['text'], compat_str)
1630 if sub_text:
1631 if not text:
1632 text = sub_text
1633 continue
1634 text += sub_text
1635 return text
1636
1637 def _extract_comment(self, comment_renderer, parent=None):
1638 comment_id = comment_renderer.get('commentId')
1639 if not comment_id:
1640 return
1641 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1642 text = self._join_text_entries(comment_text_runs) or ''
1643 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1644 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1645 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1646 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1647 author_id = try_get(comment_renderer,
1648 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1649 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1650 lambda x: x['likeCount']), compat_str)) or 0
1651 author_thumbnail = try_get(comment_renderer,
1652 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1653
1654 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1655 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1656 return {
1657 'id': comment_id,
1658 'text': text,
d92f5d5a 1659 'timestamp': timestamp,
a1c5d2ca
M
1660 'time_text': time_text,
1661 'like_count': votes,
1662 'is_favorited': is_liked,
1663 'author': author,
1664 'author_id': author_id,
1665 'author_thumbnail': author_thumbnail,
1666 'author_is_uploader': author_is_uploader,
1667 'parent': parent or 'root'
1668 }
1669
1670 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1671 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1672
1673 def extract_thread(parent_renderer):
1674 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1675 if not parent:
1676 comment_counts[2] = 0
1677 for content in contents:
1678 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1679 comment_renderer = try_get(
1680 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1681 content, (lambda x: x['commentRenderer'], dict))
1682
1683 if not comment_renderer:
1684 continue
1685 comment = self._extract_comment(comment_renderer, parent)
1686 if not comment:
1687 continue
1688 comment_counts[0] += 1
1689 yield comment
1690 # Attempt to get the replies
1691 comment_replies_renderer = try_get(
1692 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1693
1694 if comment_replies_renderer:
1695 comment_counts[2] += 1
1696 comment_entries_iter = self._comment_entries(
f4f751af 1697 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1698 parent=comment.get('id'), session_token_list=session_token_list,
1699 comment_counts=comment_counts)
1700
1701 for reply_comment in comment_entries_iter:
1702 yield reply_comment
1703
1704 if not comment_counts:
1705 # comment so far, est. total comments, current comment thread #
1706 comment_counts = [0, 0, 0]
a1c5d2ca
M
1707
1708 # TODO: Generalize the download code with TabIE
f4f751af 1709 context = self._extract_context(ytcfg)
1710 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1711 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1712 first_continuation = False
1713 if parent is None:
1714 first_continuation = True
1715
1716 for page_num in itertools.count(0):
1717 if not continuation:
1718 break
f4f751af 1719 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a06916d9 1720 retries = self.get_param('extractor_retries', 3)
a1c5d2ca
M
1721 count = -1
1722 last_error = None
1723
1724 while count < retries:
1725 count += 1
1726 if last_error:
1727 self.report_warning('%s. Retrying ...' % last_error)
1728 try:
1729 query = {
1730 'ctoken': continuation['ctoken'],
1731 'pbj': 1,
1732 'type': 'next',
1733 }
1734 if parent:
1735 query['action_get_comment_replies'] = 1
1736 else:
1737 query['action_get_comments'] = 1
1738
1739 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1740 if page_num == 0:
1741 if first_continuation:
d92f5d5a 1742 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1743 else:
d92f5d5a 1744 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1745 else:
d92f5d5a 1746 note_prefix = '%sDownloading comment%s page %d %s' % (
1747 ' ' if parent else '',
a1c5d2ca
M
1748 ' replies' if parent else '',
1749 page_num,
1750 comment_prog_str)
1751
1752 browse = self._download_json(
1753 'https://www.youtube.com/comment_service_ajax', None,
1754 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1755 headers=headers, query=query,
1756 data=urlencode_postdata({
1757 'session_token': session_token_list[0]
1758 }))
1759 except ExtractorError as e:
1760 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1761 if e.cause.code == 413:
d92f5d5a 1762 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1763 return
1764 # Downloading page may result in intermittent 5xx HTTP error
1765 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1766 last_error = 'HTTP Error %s' % e.cause.code
1767 if e.cause.code == 404:
d92f5d5a 1768 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1769 if count < retries:
1770 continue
1771 raise
1772 else:
1773 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1774 if session_token:
1775 session_token_list[0] = session_token
1776
1777 response = try_get(browse,
1778 (lambda x: x['response'],
1779 lambda x: x[1]['response'])) or {}
1780
1781 if response.get('continuationContents'):
1782 break
1783
1784 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1785 if browse.get('reload'):
d92f5d5a 1786 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1787
1788 # TODO: not tested, merged from old extractor
1789 err_msg = browse.get('externalErrorMessage')
1790 if err_msg:
1791 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1792
1793 # Youtube sometimes sends incomplete data
1794 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1795 last_error = 'Incomplete data received'
1796 if count >= retries:
6a39ee13 1797 raise ExtractorError(last_error)
a1c5d2ca
M
1798
1799 if not response:
1800 break
f4f751af 1801 visitor_data = try_get(
1802 response,
1803 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1804 compat_str) or visitor_data
a1c5d2ca
M
1805
1806 known_continuation_renderers = {
1807 'itemSectionContinuation': extract_thread,
1808 'commentRepliesContinuation': extract_thread
1809 }
1810
1811 # extract next root continuation from the results
1812 continuation_contents = try_get(
1813 response, lambda x: x['continuationContents'], dict) or {}
1814
1815 for key, value in continuation_contents.items():
1816 if key not in known_continuation_renderers:
1817 continue
1818 continuation_renderer = value
1819
1820 if first_continuation:
1821 first_continuation = False
1822 expected_comment_count = try_get(
1823 continuation_renderer,
1824 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1825 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1826 compat_str)
1827
1828 if expected_comment_count:
1829 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1830 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1831 yield comment_counts[1]
1832
1833 # TODO: cli arg.
1834 # 1/True for newest, 0/False for popular (default)
1835 comment_sort_index = int(True)
1836 sort_continuation_renderer = try_get(
1837 continuation_renderer,
1838 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1839 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1840 # If this fails, the initial continuation page
1841 # starts off with popular anyways.
1842 if sort_continuation_renderer:
1843 continuation = YoutubeTabIE._build_continuation_query(
1844 continuation=sort_continuation_renderer.get('continuation'),
1845 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1846 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1847 break
1848
1849 for entry in known_continuation_renderers[key](continuation_renderer):
1850 yield entry
1851
1852 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1853 break
1854
1855 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1856 """Entry for comment extraction"""
1857 comments = []
1858 known_entry_comment_renderers = (
1859 'itemSectionRenderer',
1860 )
1861 estimated_total = 0
1862 for entry in contents:
1863 for key, renderer in entry.items():
1864 if key not in known_entry_comment_renderers:
1865 continue
1866
1867 comment_iter = self._comment_entries(
1868 renderer,
1869 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1870 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1871 ytcfg=ytcfg,
a1c5d2ca
M
1872 session_token_list=[xsrf_token])
1873
1874 for comment in comment_iter:
1875 if isinstance(comment, int):
1876 estimated_total = comment
1877 continue
1878 comments.append(comment)
1879 break
d92f5d5a 1880 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1881 return {
1882 'comments': comments,
1883 'comment_count': len(comments),
1884 }
1885
4e6767b5 1886 @staticmethod
1887 def _get_video_info_params(video_id):
1888 return {
1889 'video_id': video_id,
1890 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1891 'html5': '1',
1892 'c': 'TVHTML5',
1893 'cver': '6.20180913',
1894 }
1895
c5e8d7af 1896 def _real_extract(self, url):
cf7e015f 1897 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1898 video_id = self._match_id(url)
9297939e 1899
1900 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
1901
545cc85d 1902 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1903 webpage_url = base_url + 'watch?v=' + video_id
1904 webpage = self._download_webpage(
cce889b9 1905 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1906
9297939e 1907 def get_text(x):
1908 if not x:
1909 return
1910 text = x.get('simpleText')
1911 if text and isinstance(text, compat_str):
1912 return text
1913 runs = x.get('runs')
1914 if not isinstance(runs, list):
1915 return
1916 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
1917
1918 ytm_streaming_data = {}
1919 if is_music_url:
1920 # we are forcing to use parse_json because 141 only appeared in get_video_info.
1921 # el, c, cver, cplayer field required for 141(aac 256kbps) codec
1922 # maybe paramter of youtube music player?
1923 ytm_player_response = self._parse_json(try_get(compat_parse_qs(
1924 self._download_webpage(
1925 base_url + 'get_video_info', video_id,
fe03a6cd 1926 'Fetching youtube music info webpage',
1927 'unable to download youtube music info webpage', query={
4e6767b5 1928 **self._get_video_info_params(video_id),
9297939e 1929 'el': 'detailpage',
1930 'c': 'WEB_REMIX',
1931 'cver': '0.1',
00ae2769 1932 'cplayer': 'UNIPLAYER',
ed807c18 1933 }, fatal=False) or ''),
9297939e 1934 lambda x: x['player_response'][0],
ed807c18 1935 compat_str) or '{}', video_id, fatal=False)
9297939e 1936 ytm_streaming_data = ytm_player_response.get('streamingData') or {}
1937
545cc85d 1938 player_response = None
1939 if webpage:
1940 player_response = self._extract_yt_initial_variable(
1941 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1942 video_id, 'initial player response')
f4f751af 1943
1944 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1945 if not player_response:
1946 player_response = self._call_api(
f4f751af 1947 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1948
1949 playability_status = player_response.get('playabilityStatus') or {}
1950 if playability_status.get('reason') == 'Sign in to confirm your age':
1951 pr = self._parse_json(try_get(compat_parse_qs(
1952 self._download_webpage(
1953 base_url + 'get_video_info', video_id,
4e6767b5 1954 'Refetching age-gated info webpage', 'unable to download video info webpage',
1955 query=self._get_video_info_params(video_id), fatal=False)),
545cc85d 1956 lambda x: x['player_response'][0],
1957 compat_str) or '{}', video_id)
1958 if pr:
1959 player_response = pr
1960
1961 trailer_video_id = try_get(
1962 playability_status,
1963 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1964 compat_str)
1965 if trailer_video_id:
1966 return self.url_result(
1967 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1968
545cc85d 1969 search_meta = (
1970 lambda x: self._html_search_meta(x, webpage, default=None)) \
1971 if webpage else lambda x: None
dbdaaa23 1972
545cc85d 1973 video_details = player_response.get('videoDetails') or {}
37357d21 1974 microformat = try_get(
545cc85d 1975 player_response,
1976 lambda x: x['microformat']['playerMicroformatRenderer'],
1977 dict) or {}
1978 video_title = video_details.get('title') \
1979 or get_text(microformat.get('title')) \
1980 or search_meta(['og:title', 'twitter:title', 'title'])
1981 video_description = video_details.get('shortDescription')
cf7e015f 1982
8fe10494 1983 if not smuggled_data.get('force_singlefeed', False):
a06916d9 1984 if not self.get_param('noplaylist'):
8fe10494
S
1985 multifeed_metadata_list = try_get(
1986 player_response,
1987 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1988 compat_str)
8fe10494
S
1989 if multifeed_metadata_list:
1990 entries = []
1991 feed_ids = []
1992 for feed in multifeed_metadata_list.split(','):
1993 # Unquote should take place before split on comma (,) since textual
1994 # fields may contain comma as well (see
067aa17e 1995 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1996 feed_data = compat_parse_qs(
1997 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1998
1999 def feed_entry(name):
545cc85d 2000 return try_get(
2001 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
2002
2003 feed_id = feed_entry('id')
2004 if not feed_id:
2005 continue
2006 feed_title = feed_entry('title')
2007 title = video_title
2008 if feed_title:
2009 title += ' (%s)' % feed_title
8fe10494
S
2010 entries.append({
2011 '_type': 'url_transparent',
2012 'ie_key': 'Youtube',
2013 'url': smuggle_url(
545cc85d 2014 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2015 {'force_singlefeed': True}),
6b09401b 2016 'title': title,
8fe10494 2017 })
6b09401b 2018 feed_ids.append(feed_id)
8fe10494
S
2019 self.to_screen(
2020 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2021 % (', '.join(feed_ids), video_id))
545cc85d 2022 return self.playlist_result(
2023 entries, video_id, video_title, video_description)
8fe10494
S
2024 else:
2025 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2026
9297939e 2027 formats, itags, stream_ids = [], [], []
cc2db878 2028 itag_qualities = {}
545cc85d 2029 player_url = None
d3fc8074 2030 q = qualities([
2031 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2032 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2033 ])
9297939e 2034
545cc85d 2035 streaming_data = player_response.get('streamingData') or {}
2036 streaming_formats = streaming_data.get('formats') or []
2037 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2038 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2039 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2040
545cc85d 2041 for fmt in streaming_formats:
2042 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2043 continue
321bf820 2044
cc2db878 2045 itag = str_or_none(fmt.get('itag'))
9297939e 2046 audio_track = fmt.get('audioTrack') or {}
2047 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2048 if stream_id in stream_ids:
2049 continue
2050
cc2db878 2051 quality = fmt.get('quality')
d3fc8074 2052 if quality == 'tiny' or not quality:
2053 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2054 if itag and quality:
2055 itag_qualities[itag] = quality
2056 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2057 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2058 # number of fragment that would subsequently requested with (`&sq=N`)
2059 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2060 continue
2061
545cc85d 2062 fmt_url = fmt.get('url')
2063 if not fmt_url:
2064 sc = compat_parse_qs(fmt.get('signatureCipher'))
2065 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2066 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2067 if not (sc and fmt_url and encrypted_sig):
2068 continue
2069 if not player_url:
2070 if not webpage:
2071 continue
2072 player_url = self._search_regex(
2073 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
2074 webpage, 'player URL', fatal=False)
2075 if not player_url:
201e9eaa 2076 continue
545cc85d 2077 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2078 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2079 fmt_url += '&' + sp + '=' + signature
2080
545cc85d 2081 if itag:
2082 itags.append(itag)
9297939e 2083 stream_ids.append(stream_id)
2084
cc2db878 2085 tbr = float_or_none(
2086 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2087 dct = {
2088 'asr': int_or_none(fmt.get('audioSampleRate')),
2089 'filesize': int_or_none(fmt.get('contentLength')),
2090 'format_id': itag,
0fb983f6 2091 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2092 'fps': int_or_none(fmt.get('fps')),
2093 'height': int_or_none(fmt.get('height')),
dca3ff4a 2094 'quality': q(quality),
cc2db878 2095 'tbr': tbr,
545cc85d 2096 'url': fmt_url,
2097 'width': fmt.get('width'),
0fb983f6 2098 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2099 }
2100 mimetype = fmt.get('mimeType')
2101 if mimetype:
2102 mobj = re.match(
2103 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2104 if mobj:
2105 dct['ext'] = mimetype2ext(mobj.group(1))
2106 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2107 no_audio = dct.get('acodec') == 'none'
2108 no_video = dct.get('vcodec') == 'none'
2109 if no_audio:
2110 dct['vbr'] = tbr
2111 if no_video:
2112 dct['abr'] = tbr
2113 if no_audio or no_video:
545cc85d 2114 dct['downloader_options'] = {
2115 # Youtube throttles chunks >~10M
2116 'http_chunk_size': 10485760,
bf1317d2 2117 }
7c60c33e 2118 if dct.get('ext'):
2119 dct['container'] = dct['ext'] + '_dash'
545cc85d 2120 formats.append(dct)
2121
9297939e 2122 for sd in (streaming_data, ytm_streaming_data):
2123 hls_manifest_url = sd.get('hlsManifestUrl')
2124 if hls_manifest_url:
2125 for f in self._extract_m3u8_formats(
2126 hls_manifest_url, video_id, 'mp4', fatal=False):
2127 itag = self._search_regex(
2128 r'/itag/(\d+)', f['url'], 'itag', default=None)
2129 if itag:
2130 f['format_id'] = itag
8d68ab98 2131 formats.append(f)
545cc85d 2132
a06916d9 2133 if self.get_param('youtube_include_dash_manifest', True):
9297939e 2134 for sd in (streaming_data, ytm_streaming_data):
2135 dash_manifest_url = sd.get('dashManifestUrl')
2136 if dash_manifest_url:
2137 for f in self._extract_mpd_formats(
2138 dash_manifest_url, video_id, fatal=False):
2139 itag = f['format_id']
2140 if itag in itags:
2141 continue
2142 if itag in itag_qualities:
9297939e 2143 f['quality'] = q(itag_qualities[itag])
2144 filesize = int_or_none(self._search_regex(
2145 r'/clen/(\d+)', f.get('fragment_base_url')
2146 or f['url'], 'file size', default=None))
2147 if filesize:
2148 f['filesize'] = filesize
2149 formats.append(f)
bf1317d2 2150
545cc85d 2151 if not formats:
a06916d9 2152 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2153 self.raise_no_formats(
545cc85d 2154 'This video is DRM protected.', expected=True)
2155 pemr = try_get(
2156 playability_status,
2157 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2158 dict) or {}
2159 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2160 subreason = pemr.get('subreason')
2161 if subreason:
2162 subreason = clean_html(get_text(subreason))
2163 if subreason == 'The uploader has not made this video available in your country.':
2164 countries = microformat.get('availableCountries')
2165 if not countries:
2166 regions_allowed = search_meta('regionsAllowed')
2167 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2168 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2169 reason += '\n' + subreason
2170 if reason:
b7da73eb 2171 self.raise_no_formats(reason, expected=True)
bf1317d2 2172
545cc85d 2173 self._sort_formats(formats)
bf1317d2 2174
545cc85d 2175 keywords = video_details.get('keywords') or []
2176 if not keywords and webpage:
2177 keywords = [
2178 unescapeHTML(m.group('content'))
2179 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2180 for keyword in keywords:
2181 if keyword.startswith('yt:stretch='):
201c1459 2182 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2183 if mobj:
2184 # NB: float is intentional for forcing float division
2185 w, h = (float(v) for v in mobj.groups())
2186 if w > 0 and h > 0:
2187 ratio = w / h
2188 for f in formats:
2189 if f.get('vcodec') != 'none':
2190 f['stretched_ratio'] = ratio
2191 break
6449cd80 2192
545cc85d 2193 thumbnails = []
2194 for container in (video_details, microformat):
2195 for thumbnail in (try_get(
2196 container,
2197 lambda x: x['thumbnail']['thumbnails'], list) or []):
2198 thumbnail_url = thumbnail.get('url')
2199 if not thumbnail_url:
bf1317d2 2200 continue
1988fab7 2201 # Sometimes youtube gives a wrong thumbnail URL. See:
2202 # https://github.com/yt-dlp/yt-dlp/issues/233
2203 # https://github.com/ytdl-org/youtube-dl/issues/28023
2204 if 'maxresdefault' in thumbnail_url:
2205 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2206 thumbnails.append({
545cc85d 2207 'url': thumbnail_url,
ff2751ac 2208 'height': int_or_none(thumbnail.get('height')),
545cc85d 2209 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2210 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2211 })
ff2751ac 2212 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2213 if thumbnail_url:
2214 thumbnails.append({
2215 'url': thumbnail_url,
2216 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2217 })
2218 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2219 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2220 thumbnails.append({
2221 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2222 'preference': 1,
2223 })
2224 self._remove_duplicate_formats(thumbnails)
545cc85d 2225
2226 category = microformat.get('category') or search_meta('genre')
2227 channel_id = video_details.get('channelId') \
2228 or microformat.get('externalChannelId') \
2229 or search_meta('channelId')
2230 duration = int_or_none(
2231 video_details.get('lengthSeconds')
2232 or microformat.get('lengthSeconds')) \
2233 or parse_duration(search_meta('duration'))
2234 is_live = video_details.get('isLive')
2235 owner_profile_url = microformat.get('ownerProfileUrl')
2236
2237 info = {
2238 'id': video_id,
2239 'title': self._live_title(video_title) if is_live else video_title,
2240 'formats': formats,
2241 'thumbnails': thumbnails,
2242 'description': video_description,
2243 'upload_date': unified_strdate(
2244 microformat.get('uploadDate')
2245 or search_meta('uploadDate')),
2246 'uploader': video_details['author'],
2247 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2248 'uploader_url': owner_profile_url,
2249 'channel_id': channel_id,
2250 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2251 'duration': duration,
2252 'view_count': int_or_none(
2253 video_details.get('viewCount')
2254 or microformat.get('viewCount')
2255 or search_meta('interactionCount')),
2256 'average_rating': float_or_none(video_details.get('averageRating')),
2257 'age_limit': 18 if (
2258 microformat.get('isFamilySafe') is False
2259 or search_meta('isFamilyFriendly') == 'false'
2260 or search_meta('og:restrictions:age') == '18+') else 0,
2261 'webpage_url': webpage_url,
2262 'categories': [category] if category else None,
2263 'tags': keywords,
2264 'is_live': is_live,
2265 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2266 'was_live': video_details.get('isLiveContent'),
545cc85d 2267 }
b477fc13 2268
545cc85d 2269 pctr = try_get(
2270 player_response,
2271 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2272 subtitles = {}
2273 if pctr:
774d79cc 2274 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2275 lang_subs = container.setdefault(lang_code, [])
545cc85d 2276 for fmt in self._SUBTITLE_FORMATS:
2277 query.update({
2278 'fmt': fmt,
2279 })
2280 lang_subs.append({
2281 'ext': fmt,
2282 'url': update_url_query(base_url, query),
774d79cc 2283 'name': sub_name,
545cc85d 2284 })
7e72694b 2285
545cc85d 2286 for caption_track in (pctr.get('captionTracks') or []):
2287 base_url = caption_track.get('baseUrl')
2288 if not base_url:
2289 continue
2290 if caption_track.get('kind') != 'asr':
120916da 2291 lang_code = (
2292 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2293 or caption_track.get('languageCode'))
545cc85d 2294 if not lang_code:
2295 continue
2296 process_language(
774d79cc 2297 subtitles, base_url, lang_code,
2298 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2299 {})
545cc85d 2300 continue
2301 automatic_captions = {}
2302 for translation_language in (pctr.get('translationLanguages') or []):
2303 translation_language_code = translation_language.get('languageCode')
2304 if not translation_language_code:
2305 continue
2306 process_language(
2307 automatic_captions, base_url, translation_language_code,
774d79cc 2308 try_get(translation_language, lambda x: x['languageName']['simpleText']),
545cc85d 2309 {'tlang': translation_language_code})
2310 info['automatic_captions'] = automatic_captions
2311 info['subtitles'] = subtitles
7e72694b 2312
545cc85d 2313 parsed_url = compat_urllib_parse_urlparse(url)
2314 for component in [parsed_url.fragment, parsed_url.query]:
2315 query = compat_parse_qs(component)
2316 for k, v in query.items():
2317 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2318 d_k += '_time'
2319 if d_k not in info and k in s_ks:
2320 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2321
2322 # Youtube Music Auto-generated description
822b9d9c 2323 if video_description:
38d70284 2324 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2325 if mobj:
822b9d9c
RA
2326 release_year = mobj.group('release_year')
2327 release_date = mobj.group('release_date')
2328 if release_date:
2329 release_date = release_date.replace('-', '')
2330 if not release_year:
545cc85d 2331 release_year = release_date[:4]
2332 info.update({
2333 'album': mobj.group('album'.strip()),
2334 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2335 'track': mobj.group('track').strip(),
2336 'release_date': release_date,
cc2db878 2337 'release_year': int_or_none(release_year),
545cc85d 2338 })
7e72694b 2339
545cc85d 2340 initial_data = None
2341 if webpage:
2342 initial_data = self._extract_yt_initial_variable(
2343 webpage, self._YT_INITIAL_DATA_RE, video_id,
2344 'yt initial data')
2345 if not initial_data:
2346 initial_data = self._call_api(
f4f751af 2347 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2348
c60ee3a2 2349 try:
2350 # This will error if there is no livechat
2351 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2352 info['subtitles']['live_chat'] = [{
2353 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2354 'video_id': video_id,
2355 'ext': 'json',
2356 'protocol': 'youtube_live_chat' if is_live else 'youtube_live_chat_replay',
2357 }]
2358 except (KeyError, IndexError, TypeError):
2359 pass
545cc85d 2360
2361 if initial_data:
2362 chapters = self._extract_chapters_from_json(
2363 initial_data, video_id, duration)
2364 if not chapters:
2365 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2366 contents = try_get(
2367 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2368 list)
2369 if not contents:
2370 continue
2371
2372 def chapter_time(mmlir):
2373 return parse_duration(
2374 get_text(mmlir.get('timeDescription')))
2375
2376 chapters = []
2377 for next_num, content in enumerate(contents, start=1):
2378 mmlir = content.get('macroMarkersListItemRenderer') or {}
2379 start_time = chapter_time(mmlir)
2380 end_time = chapter_time(try_get(
2381 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2382 if next_num < len(contents) else duration
2383 if start_time is None or end_time is None:
2384 continue
2385 chapters.append({
2386 'start_time': start_time,
2387 'end_time': end_time,
2388 'title': get_text(mmlir.get('title')),
2389 })
2390 if chapters:
2391 break
2392 if chapters:
2393 info['chapters'] = chapters
2394
2395 contents = try_get(
2396 initial_data,
2397 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2398 list) or []
2399 for content in contents:
2400 vpir = content.get('videoPrimaryInfoRenderer')
2401 if vpir:
2402 stl = vpir.get('superTitleLink')
2403 if stl:
2404 stl = get_text(stl)
2405 if try_get(
2406 vpir,
2407 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2408 info['location'] = stl
2409 else:
2410 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2411 if mobj:
2412 info.update({
2413 'series': mobj.group(1),
2414 'season_number': int(mobj.group(2)),
2415 'episode_number': int(mobj.group(3)),
2416 })
2417 for tlb in (try_get(
2418 vpir,
2419 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2420 list) or []):
2421 tbr = tlb.get('toggleButtonRenderer') or {}
2422 for getter, regex in [(
2423 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2424 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2425 lambda x: x['accessibility'],
2426 lambda x: x['accessibilityData']['accessibilityData'],
2427 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2428 label = (try_get(tbr, getter, dict) or {}).get('label')
2429 if label:
2430 mobj = re.match(regex, label)
2431 if mobj:
2432 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2433 break
2434 sbr_tooltip = try_get(
2435 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2436 if sbr_tooltip:
2437 like_count, dislike_count = sbr_tooltip.split(' / ')
2438 info.update({
2439 'like_count': str_to_int(like_count),
2440 'dislike_count': str_to_int(dislike_count),
2441 })
2442 vsir = content.get('videoSecondaryInfoRenderer')
2443 if vsir:
2444 info['channel'] = get_text(try_get(
2445 vsir,
2446 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2447 dict))
545cc85d 2448 rows = try_get(
2449 vsir,
2450 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2451 list) or []
2452 multiple_songs = False
2453 for row in rows:
2454 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2455 multiple_songs = True
2456 break
2457 for row in rows:
2458 mrr = row.get('metadataRowRenderer') or {}
2459 mrr_title = mrr.get('title')
2460 if not mrr_title:
2461 continue
2462 mrr_title = get_text(mrr['title'])
2463 mrr_contents_text = get_text(mrr['contents'][0])
2464 if mrr_title == 'License':
2465 info['license'] = mrr_contents_text
2466 elif not multiple_songs:
2467 if mrr_title == 'Album':
2468 info['album'] = mrr_contents_text
2469 elif mrr_title == 'Artist':
2470 info['artist'] = mrr_contents_text
2471 elif mrr_title == 'Song':
2472 info['track'] = mrr_contents_text
2473
2474 fallbacks = {
2475 'channel': 'uploader',
2476 'channel_id': 'uploader_id',
2477 'channel_url': 'uploader_url',
2478 }
2479 for to, frm in fallbacks.items():
2480 if not info.get(to):
2481 info[to] = info.get(frm)
2482
2483 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2484 v = info.get(s_k)
2485 if v:
2486 info[d_k] = v
b84071c0 2487
c224251a
M
2488 is_private = bool_or_none(video_details.get('isPrivate'))
2489 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2490 is_membersonly = None
b28f8d24 2491 is_premium = None
c224251a
M
2492 if initial_data and is_private is not None:
2493 is_membersonly = False
b28f8d24 2494 is_premium = False
c224251a
M
2495 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2496 for content in contents or []:
2497 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2498 for badge in badges or []:
2499 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2500 if label.lower() == 'members only':
2501 is_membersonly = True
2502 break
b28f8d24
M
2503 elif label.lower() == 'premium':
2504 is_premium = True
2505 break
2506 if is_membersonly or is_premium:
c224251a
M
2507 break
2508
2509 # TODO: Add this for playlists
2510 info['availability'] = self._availability(
2511 is_private=is_private,
b28f8d24 2512 needs_premium=is_premium,
c224251a
M
2513 needs_subscription=is_membersonly,
2514 needs_auth=info['age_limit'] >= 18,
2515 is_unlisted=None if is_private is None else is_unlisted)
2516
06167fbb 2517 # get xsrf for annotations or comments
a06916d9 2518 get_annotations = self.get_param('writeannotations', False)
2519 get_comments = self.get_param('getcomments', False)
06167fbb 2520 if get_annotations or get_comments:
29f7c58a 2521 xsrf_token = None
545cc85d 2522 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2523 if ytcfg:
2524 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2525 if not xsrf_token:
2526 xsrf_token = self._search_regex(
2527 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2528 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2529
2530 # annotations
06167fbb 2531 if get_annotations:
64b6a4e9
RA
2532 invideo_url = try_get(
2533 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2534 if xsrf_token and invideo_url:
29f7c58a 2535 xsrf_field_name = None
2536 if ytcfg:
2537 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2538 if not xsrf_field_name:
2539 xsrf_field_name = self._search_regex(
2540 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2541 webpage, 'xsrf field name',
29f7c58a 2542 group='xsrf_field_name', default='session_token')
8a784c74 2543 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2544 self._proto_relative_url(invideo_url),
2545 video_id, note='Downloading annotations',
2546 errnote='Unable to download video annotations', fatal=False,
2547 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2548
277d6ff5 2549 if get_comments:
a1c5d2ca 2550 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2551
545cc85d 2552 self.mark_watched(video_id, player_response)
d77ab8e2 2553
545cc85d 2554 return info
c5e8d7af 2555
5f6a1245 2556
8bdd16b4 2557class YoutubeTabIE(YoutubeBaseInfoExtractor):
2558 IE_DESC = 'YouTube.com tab'
70d5c17b 2559 _VALID_URL = r'''(?x)
2560 https?://
2561 (?:\w+\.)?
2562 (?:
2563 youtube(?:kids)?\.com|
2564 invidio\.us
2565 )/
2566 (?:
fe03a6cd 2567 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 2568 (?P<not_channel>
9ba5705a 2569 feed/|hashtag/|
70d5c17b 2570 (?:playlist|watch)\?.*?\blist=
2571 )|
29f7c58a 2572 (?!(?:%s)\b) # Direct URLs
70d5c17b 2573 )
2574 (?P<id>[^/?\#&]+)
2575 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2576 IE_NAME = 'youtube:tab'
2577
81127aa5 2578 _TESTS = [{
da692b79 2579 'note': 'playlists, multipage',
8bdd16b4 2580 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2581 'playlist_mincount': 94,
2582 'info_dict': {
2583 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2584 'title': 'Игорь Клейнер - Playlists',
2585 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2586 'uploader': 'Игорь Клейнер',
2587 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2588 },
2589 }, {
da692b79 2590 'note': 'playlists, multipage, different order',
8bdd16b4 2591 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2592 'playlist_mincount': 94,
2593 'info_dict': {
2594 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2595 'title': 'Игорь Клейнер - Playlists',
2596 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2597 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2598 'uploader': 'Игорь Клейнер',
8bdd16b4 2599 },
201c1459 2600 }, {
da692b79 2601 'note': 'playlists, series',
201c1459 2602 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2603 'playlist_mincount': 5,
2604 'info_dict': {
2605 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2606 'title': '3Blue1Brown - Playlists',
2607 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 2608 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2609 'uploader': '3Blue1Brown',
201c1459 2610 },
8bdd16b4 2611 }, {
da692b79 2612 'note': 'playlists, singlepage',
8bdd16b4 2613 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2614 'playlist_mincount': 4,
2615 'info_dict': {
2616 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2617 'title': 'ThirstForScience - Playlists',
2618 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2619 'uploader': 'ThirstForScience',
2620 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2621 }
2622 }, {
2623 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2624 'only_matching': True,
2625 }, {
da692b79 2626 'note': 'basic, single video playlist',
0e30a7b9 2627 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2628 'info_dict': {
0e30a7b9 2629 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2630 'uploader': 'Sergey M.',
2631 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2632 'title': 'youtube-dl public playlist',
81127aa5 2633 },
0e30a7b9 2634 'playlist_count': 1,
9291475f 2635 }, {
da692b79 2636 'note': 'empty playlist',
0e30a7b9 2637 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2638 'info_dict': {
0e30a7b9 2639 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2640 'uploader': 'Sergey M.',
2641 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2642 'title': 'youtube-dl empty playlist',
9291475f
PH
2643 },
2644 'playlist_count': 0,
2645 }, {
da692b79 2646 'note': 'Home tab',
8bdd16b4 2647 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2648 'info_dict': {
8bdd16b4 2649 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2650 'title': 'lex will - Home',
2651 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2652 'uploader': 'lex will',
2653 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2654 },
8bdd16b4 2655 'playlist_mincount': 2,
9291475f 2656 }, {
da692b79 2657 'note': 'Videos tab',
8bdd16b4 2658 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2659 'info_dict': {
8bdd16b4 2660 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2661 'title': 'lex will - Videos',
2662 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2663 'uploader': 'lex will',
2664 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2665 },
8bdd16b4 2666 'playlist_mincount': 975,
9291475f 2667 }, {
da692b79 2668 'note': 'Videos tab, sorted by popular',
8bdd16b4 2669 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2670 'info_dict': {
8bdd16b4 2671 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2672 'title': 'lex will - Videos',
2673 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2674 'uploader': 'lex will',
2675 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2676 },
8bdd16b4 2677 'playlist_mincount': 199,
9291475f 2678 }, {
da692b79 2679 'note': 'Playlists tab',
8bdd16b4 2680 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2681 'info_dict': {
8bdd16b4 2682 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2683 'title': 'lex will - Playlists',
2684 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2685 'uploader': 'lex will',
2686 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2687 },
8bdd16b4 2688 'playlist_mincount': 17,
ac7553d0 2689 }, {
da692b79 2690 'note': 'Community tab',
8bdd16b4 2691 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2692 'info_dict': {
8bdd16b4 2693 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2694 'title': 'lex will - Community',
2695 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2696 'uploader': 'lex will',
2697 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2698 },
2699 'playlist_mincount': 18,
87dadd45 2700 }, {
da692b79 2701 'note': 'Channels tab',
8bdd16b4 2702 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2703 'info_dict': {
8bdd16b4 2704 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2705 'title': 'lex will - Channels',
2706 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2707 'uploader': 'lex will',
2708 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2709 },
deaec5af 2710 'playlist_mincount': 12,
cd684175 2711 }, {
2712 'note': 'Search tab',
2713 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
2714 'playlist_mincount': 40,
2715 'info_dict': {
2716 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2717 'title': '3Blue1Brown - Search - linear algebra',
2718 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2719 'uploader': '3Blue1Brown',
2720 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2721 },
6b08cdf6 2722 }, {
a0566bbf 2723 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2724 'only_matching': True,
2725 }, {
a0566bbf 2726 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2727 'only_matching': True,
2728 }, {
a0566bbf 2729 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2730 'only_matching': True,
2731 }, {
2732 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2733 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2734 'info_dict': {
2735 'title': '29C3: Not my department',
2736 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2737 'uploader': 'Christiaan008',
2738 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2739 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2740 },
2741 'playlist_count': 96,
2742 }, {
2743 'note': 'Large playlist',
2744 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2745 'info_dict': {
8bdd16b4 2746 'title': 'Uploads from Cauchemar',
2747 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2748 'uploader': 'Cauchemar',
2749 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2750 },
8bdd16b4 2751 'playlist_mincount': 1123,
2752 }, {
da692b79 2753 'note': 'even larger playlist, 8832 videos',
8bdd16b4 2754 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2755 'only_matching': True,
4b7df0d3
JMF
2756 }, {
2757 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2758 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2759 'info_dict': {
acf757f4
PH
2760 'title': 'Uploads from Interstellar Movie',
2761 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2762 'uploader': 'Interstellar Movie',
8bdd16b4 2763 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2764 },
481cc733 2765 'playlist_mincount': 21,
358de58c 2766 }, {
2767 'note': 'Playlist with "show unavailable videos" button',
2768 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2769 'info_dict': {
2770 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2771 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2772 'uploader': 'Phim Siêu Nhân Nhật Bản',
2773 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2774 },
da692b79 2775 'playlist_mincount': 200,
5d342002 2776 }, {
da692b79 2777 'note': 'Playlist with unavailable videos in page 7',
5d342002 2778 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2779 'info_dict': {
2780 'title': 'Uploads from BlankTV',
2781 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2782 'uploader': 'BlankTV',
2783 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2784 },
da692b79 2785 'playlist_mincount': 1000,
8bdd16b4 2786 }, {
da692b79 2787 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 2788 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2789 'info_dict': {
2790 'title': 'Data Analysis with Dr Mike Pound',
2791 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2792 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2793 'uploader': 'Computerphile',
deaec5af 2794 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2795 },
2796 'playlist_mincount': 11,
2797 }, {
a0566bbf 2798 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2799 'only_matching': True,
dacb3a86 2800 }, {
da692b79 2801 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
2802 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2803 'info_dict': {
2804 'id': 'FqZTN594JQw',
2805 'ext': 'webm',
2806 'title': "Smiley's People 01 detective, Adventure Series, Action",
2807 'uploader': 'STREEM',
2808 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2809 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2810 'upload_date': '20150526',
2811 'license': 'Standard YouTube License',
2812 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2813 'categories': ['People & Blogs'],
2814 'tags': list,
dbdaaa23 2815 'view_count': int,
dacb3a86
S
2816 'like_count': int,
2817 'dislike_count': int,
2818 },
2819 'params': {
2820 'skip_download': True,
2821 },
13a75688 2822 'skip': 'This video is not available.',
dacb3a86 2823 'add_ie': [YoutubeIE.ie_key()],
481cc733 2824 }, {
8bdd16b4 2825 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2826 'only_matching': True,
66b48727 2827 }, {
8bdd16b4 2828 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2829 'only_matching': True,
a0566bbf 2830 }, {
2831 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2832 'info_dict': {
da692b79 2833 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 2834 'ext': 'mp4',
deaec5af 2835 'title': compat_str,
a0566bbf 2836 'uploader': 'Sky News',
2837 'uploader_id': 'skynews',
2838 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 2839 'upload_date': r're:\d{8}',
2840 'description': compat_str,
a0566bbf 2841 'categories': ['News & Politics'],
2842 'tags': list,
2843 'like_count': int,
2844 'dislike_count': int,
2845 },
2846 'params': {
2847 'skip_download': True,
2848 },
da692b79 2849 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 2850 }, {
2851 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2852 'info_dict': {
2853 'id': 'a48o2S1cPoo',
2854 'ext': 'mp4',
2855 'title': 'The Young Turks - Live Main Show',
2856 'uploader': 'The Young Turks',
2857 'uploader_id': 'TheYoungTurks',
2858 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2859 'upload_date': '20150715',
2860 'license': 'Standard YouTube License',
2861 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2862 'categories': ['News & Politics'],
2863 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2864 'like_count': int,
2865 'dislike_count': int,
2866 },
2867 'params': {
2868 'skip_download': True,
2869 },
2870 'only_matching': True,
2871 }, {
2872 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2873 'only_matching': True,
2874 }, {
2875 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2876 'only_matching': True,
09f1580e 2877 }, {
2878 'note': 'A channel that is not live. Should raise error',
2879 'url': 'https://www.youtube.com/user/numberphile/live',
2880 'only_matching': True,
3d3dddc9 2881 }, {
2882 'url': 'https://www.youtube.com/feed/trending',
2883 'only_matching': True,
2884 }, {
3d3dddc9 2885 'url': 'https://www.youtube.com/feed/library',
2886 'only_matching': True,
2887 }, {
3d3dddc9 2888 'url': 'https://www.youtube.com/feed/history',
2889 'only_matching': True,
2890 }, {
3d3dddc9 2891 'url': 'https://www.youtube.com/feed/subscriptions',
2892 'only_matching': True,
2893 }, {
3d3dddc9 2894 'url': 'https://www.youtube.com/feed/watch_later',
2895 'only_matching': True,
2896 }, {
da692b79 2897 'note': 'Recommended - redirects to home page',
3d3dddc9 2898 'url': 'https://www.youtube.com/feed/recommended',
2899 'only_matching': True,
29f7c58a 2900 }, {
da692b79 2901 'note': 'inline playlist with not always working continuations',
29f7c58a 2902 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2903 'only_matching': True,
2904 }, {
2905 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2906 'only_matching': True,
2907 }, {
2908 'url': 'https://www.youtube.com/course',
2909 'only_matching': True,
2910 }, {
2911 'url': 'https://www.youtube.com/zsecurity',
2912 'only_matching': True,
2913 }, {
2914 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2915 'only_matching': True,
2916 }, {
2917 'url': 'https://www.youtube.com/TheYoungTurks/live',
2918 'only_matching': True,
39ed931e 2919 }, {
2920 'url': 'https://www.youtube.com/hashtag/cctv9',
2921 'info_dict': {
2922 'id': 'cctv9',
2923 'title': '#cctv9',
2924 },
2925 'playlist_mincount': 350,
201c1459 2926 }, {
2927 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2928 'only_matching': True,
9297939e 2929 }, {
da692b79 2930 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 2931 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2932 'only_matching': True
fe03a6cd 2933 }, {
2934 'note': '/browse/ should redirect to /channel/',
2935 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
2936 'only_matching': True
2937 }, {
2938 'note': 'VLPL, should redirect to playlist?list=PL...',
2939 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2940 'info_dict': {
2941 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2942 'uploader': 'NoCopyrightSounds',
2943 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
2944 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
2945 'title': 'NCS Releases',
2946 },
2947 'playlist_mincount': 166,
18db7548 2948 }, {
2949 'note': 'Topic, should redirect to playlist?list=UU...',
2950 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
2951 'info_dict': {
2952 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
2953 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
2954 'title': 'Uploads from Royalty Free Music - Topic',
2955 'uploader': 'Royalty Free Music - Topic',
2956 },
2957 'expected_warnings': [
2958 'A channel/user page was given',
2959 'The URL does not have a videos tab',
2960 ],
2961 'playlist_mincount': 101,
2962 }, {
2963 'note': 'Topic without a UU playlist',
2964 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
2965 'info_dict': {
2966 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
2967 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
2968 },
2969 'expected_warnings': [
2970 'A channel/user page was given',
2971 'The URL does not have a videos tab',
2972 'Falling back to channel URL',
2973 ],
2974 'playlist_mincount': 9,
abcdd12b 2975 }, {
2976 'note': 'Youtube music Album',
2977 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
2978 'info_dict': {
2979 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
2980 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
2981 },
2982 'playlist_count': 50,
29f7c58a 2983 }]
2984
2985 @classmethod
2986 def suitable(cls, url):
2987 return False if YoutubeIE.suitable(url) else super(
2988 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2989
2990 def _extract_channel_id(self, webpage):
2991 channel_id = self._html_search_meta(
2992 'channelId', webpage, 'channel id', default=None)
2993 if channel_id:
2994 return channel_id
2995 channel_url = self._html_search_meta(
2996 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2997 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2998 'twitter:app:url:googleplay'), webpage, 'channel url')
2999 return self._search_regex(
3000 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3001 channel_url, 'channel id')
15f6397c 3002
8bdd16b4 3003 @staticmethod
cd7c66cf 3004 def _extract_basic_item_renderer(item):
3005 # Modified from _extract_grid_item_renderer
201c1459 3006 known_basic_renderers = (
3007 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3008 )
3009 for key, renderer in item.items():
201c1459 3010 if not isinstance(renderer, dict):
cd7c66cf 3011 continue
201c1459 3012 elif key in known_basic_renderers:
3013 return renderer
3014 elif key.startswith('grid') and key.endswith('Renderer'):
3015 return renderer
8bdd16b4 3016
8bdd16b4 3017 def _grid_entries(self, grid_renderer):
3018 for item in grid_renderer['items']:
3019 if not isinstance(item, dict):
39b62db1 3020 continue
cd7c66cf 3021 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3022 if not isinstance(renderer, dict):
3023 continue
3024 title = try_get(
201c1459 3025 renderer, (lambda x: x['title']['runs'][0]['text'],
3026 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 3027 # playlist
3028 playlist_id = renderer.get('playlistId')
3029 if playlist_id:
3030 yield self.url_result(
3031 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3032 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3033 video_title=title)
201c1459 3034 continue
8bdd16b4 3035 # video
3036 video_id = renderer.get('videoId')
3037 if video_id:
3038 yield self._extract_video(renderer)
201c1459 3039 continue
8bdd16b4 3040 # channel
3041 channel_id = renderer.get('channelId')
3042 if channel_id:
3043 title = try_get(
3044 renderer, lambda x: x['title']['simpleText'], compat_str)
3045 yield self.url_result(
3046 'https://www.youtube.com/channel/%s' % channel_id,
3047 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3048 continue
3049 # generic endpoint URL support
3050 ep_url = urljoin('https://www.youtube.com/', try_get(
3051 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3052 compat_str))
3053 if ep_url:
3054 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3055 if ie.suitable(ep_url):
3056 yield self.url_result(
3057 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3058 break
8bdd16b4 3059
3d3dddc9 3060 def _shelf_entries_from_content(self, shelf_renderer):
3061 content = shelf_renderer.get('content')
3062 if not isinstance(content, dict):
8bdd16b4 3063 return
cd7c66cf 3064 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3065 if renderer:
3066 # TODO: add support for nested playlists so each shelf is processed
3067 # as separate playlist
3068 # TODO: this includes only first N items
3069 for entry in self._grid_entries(renderer):
3070 yield entry
3071 renderer = content.get('horizontalListRenderer')
3072 if renderer:
3073 # TODO
3074 pass
8bdd16b4 3075
29f7c58a 3076 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3077 ep = try_get(
3078 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3079 compat_str)
3080 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3081 if shelf_url:
29f7c58a 3082 # Skipping links to another channels, note that checking for
3083 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3084 # will not work
3085 if skip_channels and '/channels?' in shelf_url:
3086 return
3d3dddc9 3087 title = try_get(
3088 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3089 yield self.url_result(shelf_url, video_title=title)
3090 # Shelf may not contain shelf URL, fallback to extraction from content
3091 for entry in self._shelf_entries_from_content(shelf_renderer):
3092 yield entry
c5e8d7af 3093
8bdd16b4 3094 def _playlist_entries(self, video_list_renderer):
3095 for content in video_list_renderer['contents']:
3096 if not isinstance(content, dict):
3097 continue
3098 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3099 if not isinstance(renderer, dict):
3100 continue
3101 video_id = renderer.get('videoId')
3102 if not video_id:
3103 continue
3104 yield self._extract_video(renderer)
07aeced6 3105
3462ffa8 3106 def _rich_entries(self, rich_grid_renderer):
3107 renderer = try_get(
70d5c17b 3108 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3109 video_id = renderer.get('videoId')
3110 if not video_id:
3111 return
3112 yield self._extract_video(renderer)
3113
8bdd16b4 3114 def _video_entry(self, video_renderer):
3115 video_id = video_renderer.get('videoId')
3116 if video_id:
3117 return self._extract_video(video_renderer)
dacb3a86 3118
8bdd16b4 3119 def _post_thread_entries(self, post_thread_renderer):
3120 post_renderer = try_get(
3121 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3122 if not post_renderer:
3123 return
3124 # video attachment
3125 video_renderer = try_get(
895b0931 3126 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3127 video_id = video_renderer.get('videoId')
3128 if video_id:
3129 entry = self._extract_video(video_renderer)
8bdd16b4 3130 if entry:
3131 yield entry
895b0931 3132 # playlist attachment
3133 playlist_id = try_get(
3134 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3135 if playlist_id:
3136 yield self.url_result(
e28f1c0a 3137 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3138 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3139 # inline video links
3140 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3141 for run in runs:
3142 if not isinstance(run, dict):
3143 continue
3144 ep_url = try_get(
3145 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3146 if not ep_url:
3147 continue
3148 if not YoutubeIE.suitable(ep_url):
3149 continue
3150 ep_video_id = YoutubeIE._match_id(ep_url)
3151 if video_id == ep_video_id:
3152 continue
895b0931 3153 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3154
8bdd16b4 3155 def _post_thread_continuation_entries(self, post_thread_continuation):
3156 contents = post_thread_continuation.get('contents')
3157 if not isinstance(contents, list):
3158 return
3159 for content in contents:
3160 renderer = content.get('backstagePostThreadRenderer')
3161 if not isinstance(renderer, dict):
3162 continue
3163 for entry in self._post_thread_entries(renderer):
3164 yield entry
07aeced6 3165
39ed931e 3166 r''' # unused
3167 def _rich_grid_entries(self, contents):
3168 for content in contents:
3169 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3170 if video_renderer:
3171 entry = self._video_entry(video_renderer)
3172 if entry:
3173 yield entry
3174 '''
3175
29f7c58a 3176 @staticmethod
3177 def _build_continuation_query(continuation, ctp=None):
3178 query = {
3179 'ctoken': continuation,
3180 'continuation': continuation,
3181 }
3182 if ctp:
3183 query['itct'] = ctp
3184 return query
3185
8bdd16b4 3186 @staticmethod
3187 def _extract_next_continuation_data(renderer):
3188 next_continuation = try_get(
3189 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3190 if not next_continuation:
3191 return
3192 continuation = next_continuation.get('continuation')
3193 if not continuation:
3194 return
3195 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3196 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3197
8bdd16b4 3198 @classmethod
3199 def _extract_continuation(cls, renderer):
3200 next_continuation = cls._extract_next_continuation_data(renderer)
3201 if next_continuation:
3202 return next_continuation
cc2db878 3203 contents = []
3204 for key in ('contents', 'items'):
3205 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3206 for content in contents:
3207 if not isinstance(content, dict):
3208 continue
3209 continuation_ep = try_get(
3210 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3211 dict)
3212 if not continuation_ep:
3213 continue
3214 continuation = try_get(
3215 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3216 if not continuation:
3217 continue
3218 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3219 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3220
f4f751af 3221 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3222
70d5c17b 3223 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3224 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3225 for content in contents:
3226 if not isinstance(content, dict):
8bdd16b4 3227 continue
70d5c17b 3228 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3229 if not is_renderer:
70d5c17b 3230 renderer = content.get('richItemRenderer')
3462ffa8 3231 if renderer:
3232 for entry in self._rich_entries(renderer):
3233 yield entry
3234 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3235 continue
3462ffa8 3236 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3237 for isr_content in isr_contents:
3238 if not isinstance(isr_content, dict):
3239 continue
69184e41 3240
3241 known_renderers = {
3242 'playlistVideoListRenderer': self._playlist_entries,
3243 'gridRenderer': self._grid_entries,
3244 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3245 'backstagePostThreadRenderer': self._post_thread_entries,
3246 'videoRenderer': lambda x: [self._video_entry(x)],
3247 }
3248 for key, renderer in isr_content.items():
3249 if key not in known_renderers:
3250 continue
3251 for entry in known_renderers[key](renderer):
3252 if entry:
3253 yield entry
3462ffa8 3254 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3255 break
70d5c17b 3256
3462ffa8 3257 if not continuation_list[0]:
3258 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3259
3260 if not continuation_list[0]:
3261 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3262
3263 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3264 tab_content = try_get(tab, lambda x: x['content'], dict)
3265 if not tab_content:
3266 return
3462ffa8 3267 parent_renderer = (
29f7c58a 3268 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3269 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3270 for entry in extract_entries(parent_renderer):
3271 yield entry
3462ffa8 3272 continuation = continuation_list[0]
f4f751af 3273 context = self._extract_context(ytcfg)
3274 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3275
8bdd16b4 3276 for page_num in itertools.count(1):
3277 if not continuation:
3278 break
79360d99 3279 query = {
3280 'continuation': continuation['continuation'],
3281 'clickTracking': {'clickTrackingParams': continuation['itct']}
3282 }
f4f751af 3283 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3284 response = self._extract_response(
3285 item_id='%s page %s' % (item_id, page_num),
3286 query=query, headers=headers, ytcfg=ytcfg,
3287 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3288
3289 if not response:
8bdd16b4 3290 break
f4f751af 3291 visitor_data = try_get(
3292 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3293
69184e41 3294 known_continuation_renderers = {
3295 'playlistVideoListContinuation': self._playlist_entries,
3296 'gridContinuation': self._grid_entries,
3297 'itemSectionContinuation': self._post_thread_continuation_entries,
3298 'sectionListContinuation': extract_entries, # for feeds
3299 }
8bdd16b4 3300 continuation_contents = try_get(
69184e41 3301 response, lambda x: x['continuationContents'], dict) or {}
3302 continuation_renderer = None
3303 for key, value in continuation_contents.items():
3304 if key not in known_continuation_renderers:
3462ffa8 3305 continue
69184e41 3306 continuation_renderer = value
3307 continuation_list = [None]
3308 for entry in known_continuation_renderers[key](continuation_renderer):
3309 yield entry
3310 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3311 break
3312 if continuation_renderer:
3313 continue
c5e8d7af 3314
a1b535bd 3315 known_renderers = {
3316 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3317 'gridVideoRenderer': (self._grid_entries, 'items'),
3318 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3319 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3320 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3321 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3322 }
cce889b9 3323 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3324 continuation_items = try_get(
cce889b9 3325 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3326 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3327 video_items_renderer = None
3328 for key, value in continuation_item.items():
3329 if key not in known_renderers:
8bdd16b4 3330 continue
a1b535bd 3331 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3332 continuation_list = [None]
a1b535bd 3333 for entry in known_renderers[key][0](video_items_renderer):
3334 yield entry
9ba5705a 3335 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3336 break
3337 if video_items_renderer:
3338 continue
8bdd16b4 3339 break
9558dcec 3340
8bdd16b4 3341 @staticmethod
3342 def _extract_selected_tab(tabs):
3343 for tab in tabs:
cd684175 3344 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3345 if renderer.get('selected') is True:
3346 return renderer
2b3c2546 3347 else:
8bdd16b4 3348 raise ExtractorError('Unable to find selected tab')
b82f815f 3349
8bdd16b4 3350 @staticmethod
3351 def _extract_uploader(data):
3352 uploader = {}
3353 sidebar_renderer = try_get(
3354 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3355 if sidebar_renderer:
3356 for item in sidebar_renderer:
3357 if not isinstance(item, dict):
3358 continue
3359 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3360 if not isinstance(renderer, dict):
3361 continue
3362 owner = try_get(
3363 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3364 if owner:
3365 uploader['uploader'] = owner.get('text')
3366 uploader['uploader_id'] = try_get(
3367 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3368 uploader['uploader_url'] = urljoin(
3369 'https://www.youtube.com/',
3370 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3371 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3372
d069eca7 3373 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3374 playlist_id = title = description = channel_url = channel_name = channel_id = None
3375 thumbnails_list = tags = []
3376
8bdd16b4 3377 selected_tab = self._extract_selected_tab(tabs)
3378 renderer = try_get(
3379 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3380 if renderer:
b60419c5 3381 channel_name = renderer.get('title')
3382 channel_url = renderer.get('channelUrl')
3383 channel_id = renderer.get('externalId')
39ed931e 3384 else:
64c0d954 3385 renderer = try_get(
3386 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3387
8bdd16b4 3388 if renderer:
3389 title = renderer.get('title')
ecc97af3 3390 description = renderer.get('description', '')
b60419c5 3391 playlist_id = channel_id
3392 tags = renderer.get('keywords', '').split()
3393 thumbnails_list = (
3394 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3395 or try_get(
3396 data,
3397 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3398 list)
b60419c5 3399 or [])
3400
3401 thumbnails = []
3402 for t in thumbnails_list:
3403 if not isinstance(t, dict):
3404 continue
3405 thumbnail_url = url_or_none(t.get('url'))
3406 if not thumbnail_url:
3407 continue
3408 thumbnails.append({
3409 'url': thumbnail_url,
3410 'width': int_or_none(t.get('width')),
3411 'height': int_or_none(t.get('height')),
3412 })
3462ffa8 3413 if playlist_id is None:
70d5c17b 3414 playlist_id = item_id
3415 if title is None:
39ed931e 3416 title = (
3417 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3418 or playlist_id)
b60419c5 3419 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3420 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3421
3422 metadata = {
3423 'playlist_id': playlist_id,
3424 'playlist_title': title,
3425 'playlist_description': description,
3426 'uploader': channel_name,
3427 'uploader_id': channel_id,
3428 'uploader_url': channel_url,
3429 'thumbnails': thumbnails,
3430 'tags': tags,
3431 }
3432 if not channel_id:
3433 metadata.update(self._extract_uploader(data))
3434 metadata.update({
3435 'channel': metadata['uploader'],
3436 'channel_id': metadata['uploader_id'],
3437 'channel_url': metadata['uploader_url']})
3438 return self.playlist_result(
d069eca7
M
3439 self._entries(
3440 selected_tab, playlist_id,
3441 self._extract_identity_token(webpage, item_id),
f4f751af 3442 self._extract_account_syncid(data),
3443 self._extract_ytcfg(item_id, webpage)),
b60419c5 3444 **metadata)
73c4ac2c 3445
79360d99 3446 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3447 first_id = last_id = None
79360d99 3448 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3449 headers = self._generate_api_headers(
3450 ytcfg, account_syncid=self._extract_account_syncid(data),
3451 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3452 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3453 for page_num in itertools.count(1):
cd7c66cf 3454 videos = list(self._playlist_entries(playlist))
3455 if not videos:
3456 return
2be71994 3457 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3458 if start >= len(videos):
3459 return
3460 for video in videos[start:]:
3461 if video['id'] == first_id:
3462 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3463 return
3464 yield video
3465 first_id = first_id or videos[0]['id']
3466 last_id = videos[-1]['id']
79360d99 3467 watch_endpoint = try_get(
3468 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3469 query = {
3470 'playlistId': playlist_id,
3471 'videoId': watch_endpoint.get('videoId') or last_id,
3472 'index': watch_endpoint.get('index') or len(videos),
3473 'params': watch_endpoint.get('params') or 'OAE%3D'
3474 }
3475 response = self._extract_response(
3476 item_id='%s page %d' % (playlist_id, page_num),
3477 query=query,
3478 ep='next',
3479 headers=headers,
3480 check_get_keys='contents'
3481 )
cd7c66cf 3482 playlist = try_get(
79360d99 3483 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3484
79360d99 3485 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3486 title = playlist.get('title') or try_get(
3487 data, lambda x: x['titleText']['simpleText'], compat_str)
3488 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3489
3490 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3491 playlist_url = urljoin(url, try_get(
3492 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3493 compat_str))
3494 if playlist_url and playlist_url != url:
3495 return self.url_result(
3496 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3497 video_title=title)
cd7c66cf 3498
8bdd16b4 3499 return self.playlist_result(
79360d99 3500 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3501 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3502
95c01b6c 3503 @staticmethod
3504 def _extract_alerts(data):
3505 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3506 if not isinstance(alert_dict, dict):
3507 continue
3508 for alert in alert_dict.values():
3509 alert_type = alert.get('type')
3510 if not alert_type:
02ced43c 3511 continue
95c01b6c 3512 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
3513 if message:
3514 yield alert_type, message
3515 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3516 message += try_get(run, lambda x: x['text'], compat_str)
3517 if message:
3518 yield alert_type, message
3519
3520 def _report_alerts(self, alerts, expected=True):
3ffc7c89 3521 errors = []
3522 warnings = []
95c01b6c 3523 for alert_type, alert_message in alerts:
f3eaa8dd 3524 if alert_type.lower() == 'error':
3ffc7c89 3525 errors.append([alert_type, alert_message])
f3eaa8dd 3526 else:
3ffc7c89 3527 warnings.append([alert_type, alert_message])
f3eaa8dd 3528
3ffc7c89 3529 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3530 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3531 if errors:
3532 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3533
95c01b6c 3534 def _extract_and_report_alerts(self, data, *args, **kwargs):
3535 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
3536
358de58c 3537 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3538 """
3539 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3540 """
3541 sidebar_renderer = try_get(
5d342002 3542 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3543 if not sidebar_renderer:
3544 return
3545 browse_id = params = None
358de58c 3546 for item in sidebar_renderer:
3547 if not isinstance(item, dict):
3548 continue
3549 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3550 menu_renderer = try_get(
3551 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3552 for menu_item in menu_renderer:
3553 if not isinstance(menu_item, dict):
3554 continue
3555 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3556 text = try_get(
3557 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3558 if not text or text.lower() != 'show unavailable videos':
3559 continue
3560 browse_endpoint = try_get(
3561 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3562 browse_id = browse_endpoint.get('browseId')
3563 params = browse_endpoint.get('params')
5d342002 3564 break
3565
3566 ytcfg = self._extract_ytcfg(item_id, webpage)
3567 headers = self._generate_api_headers(
3568 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3569 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3570 visitor_data=try_get(
3571 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3572 query = {
3573 'params': params or 'wgYCCAA=',
3574 'browseId': browse_id or 'VL%s' % item_id
3575 }
3576 return self._extract_response(
3577 item_id=item_id, headers=headers, query=query,
3578 check_get_keys='contents', fatal=False,
3579 note='Downloading API JSON with unavailable videos')
358de58c 3580
79360d99 3581 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3582 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3583 response = None
3584 last_error = None
3585 count = -1
a06916d9 3586 retries = self.get_param('extractor_retries', 3)
79360d99 3587 if check_get_keys is None:
3588 check_get_keys = []
3589 while count < retries:
3590 count += 1
3591 if last_error:
3592 self.report_warning('%s. Retrying ...' % last_error)
3593 try:
3594 response = self._call_api(
3595 ep=ep, fatal=True, headers=headers,
358de58c 3596 video_id=item_id, query=query,
79360d99 3597 context=self._extract_context(ytcfg),
3598 api_key=self._extract_api_key(ytcfg),
3599 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3600 except ExtractorError as e:
3601 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3602 # Downloading page may result in intermittent 5xx HTTP error
3603 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3604 last_error = 'HTTP Error %s' % e.cause.code
3605 if count < retries:
3606 continue
358de58c 3607 if fatal:
3608 raise
3609 else:
3610 self.report_warning(error_to_compat_str(e))
3611 return
3612
79360d99 3613 else:
3614 # Youtube may send alerts if there was an issue with the continuation page
4ba00108 3615 try:
3616 self._extract_and_report_alerts(response, expected=False)
3617 except ExtractorError as e:
3618 if fatal:
3619 raise
3620 self.report_warning(error_to_compat_str(e))
3621 return
79360d99 3622 if not check_get_keys or dict_get(response, check_get_keys):
3623 break
3624 # Youtube sometimes sends incomplete data
3625 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3626 last_error = 'Incomplete data received'
3627 if count >= retries:
358de58c 3628 if fatal:
3629 raise ExtractorError(last_error)
3630 else:
3631 self.report_warning(last_error)
3632 return
79360d99 3633 return response
3634
cd7c66cf 3635 def _extract_webpage(self, url, item_id):
a06916d9 3636 retries = self.get_param('extractor_retries', 3)
62bff2c1 3637 count = -1
c705177d 3638 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3639 while count < retries:
62bff2c1 3640 count += 1
14fdfea9 3641 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3642 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3643 if count:
c705177d 3644 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3645 webpage = self._download_webpage(
3646 url, item_id,
cd7c66cf 3647 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3648 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3649 if data.get('contents') or data.get('currentVideoEndpoint'):
3650 break
95c01b6c 3651 # Extract alerts here only when there is error
3652 self._extract_and_report_alerts(data)
c705177d 3653 if count >= retries:
6a39ee13 3654 raise ExtractorError(last_error)
cd7c66cf 3655 return webpage, data
3656
9297939e 3657 @staticmethod
3658 def _smuggle_data(entries, data):
3659 for entry in entries:
3660 if data:
3661 entry['url'] = smuggle_url(entry['url'], data)
3662 yield entry
3663
cd7c66cf 3664 def _real_extract(self, url):
9297939e 3665 url, smuggled_data = unsmuggle_url(url, {})
3666 if self.is_music_url(url):
3667 smuggled_data['is_music_url'] = True
fe03a6cd 3668 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3669 if info_dict.get('entries'):
3670 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3671 return info_dict
3672
fe03a6cd 3673 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3674
3675 def __real_extract(self, url, smuggled_data):
cd7c66cf 3676 item_id = self._match_id(url)
3677 url = compat_urlparse.urlunparse(
3678 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3679 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3680
fe03a6cd 3681 def get_mobj(url):
3682 mobj = self._url_re.match(url).groupdict()
07cce701 3683 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 3684 return mobj
3685
3686 mobj = get_mobj(url)
3687 # Youtube returns incomplete data if tabname is not lower case
3688 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
3689
3690 if is_channel:
3691 if smuggled_data.get('is_music_url'):
3692 if item_id[:2] == 'VL':
3693 # Youtube music VL channels have an equivalent playlist
3694 item_id = item_id[2:]
3695 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 3696 elif item_id[:2] == 'MP':
3697 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
3698 item_id = self._search_regex(
3699 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
3700 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
3701 'playlist id')
3702 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 3703 elif mobj['channel_type'] == 'browse':
3704 # Youtube music /browse/ should be changed to /channel/
3705 pre = 'https://www.youtube.com/channel/%s' % item_id
3706 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
3707 # Home URLs should redirect to /videos/
6a39ee13 3708 self.report_warning(
cd7c66cf 3709 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3710 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 3711 tab = '/videos'
3712
3713 url = ''.join((pre, tab, post))
3714 mobj = get_mobj(url)
cd7c66cf 3715
3716 # Handle both video/playlist URLs
201c1459 3717 qs = parse_qs(url)
cd7c66cf 3718 video_id = qs.get('v', [None])[0]
3719 playlist_id = qs.get('list', [None])[0]
3720
fe03a6cd 3721 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 3722 if not playlist_id:
fe03a6cd 3723 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 3724 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 3725 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 3726 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3727 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 3728 mobj = get_mobj(url)
cd7c66cf 3729
3730 if video_id and playlist_id:
a06916d9 3731 if self.get_param('noplaylist'):
cd7c66cf 3732 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3733 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3734 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3735
3736 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3737
18db7548 3738 tabs = try_get(
3739 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3740 if tabs:
3741 selected_tab = self._extract_selected_tab(tabs)
3742 tab_name = selected_tab.get('title', '')
09f1580e 3743 if 'no-youtube-channel-redirect' not in compat_opts:
3744 if mobj['tab'] == '/live':
3745 # Live tab should have redirected to the video
3746 raise ExtractorError('The channel is not currently live', expected=True)
3747 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
3748 if not mobj['not_channel'] and item_id[:2] == 'UC':
3749 # Topic channels don't have /videos. Use the equivalent playlist instead
3750 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
3751 pl_id = 'UU%s' % item_id[2:]
3752 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
3753 try:
3754 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
3755 for alert_type, alert_message in self._extract_alerts(pl_data):
3756 if alert_type == 'error':
3757 raise ExtractorError('Youtube said: %s' % alert_message)
3758 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
3759 except ExtractorError:
3760 self.report_warning('The playlist gave error. Falling back to channel URL')
3761 else:
3762 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 3763
3764 self.write_debug('Final URL: %s' % url)
3765
358de58c 3766 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3767 if 'no-youtube-unavailable-videos' not in compat_opts:
3768 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 3769 self._extract_and_report_alerts(data)
358de58c 3770
8bdd16b4 3771 tabs = try_get(
3772 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3773 if tabs:
d069eca7 3774 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3775
8bdd16b4 3776 playlist = try_get(
3777 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3778 if playlist:
79360d99 3779 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3780
a0566bbf 3781 video_id = try_get(
3782 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3783 compat_str) or video_id
8bdd16b4 3784 if video_id:
09f1580e 3785 if mobj['tab'] != '/live': # live tab is expected to redirect to video
3786 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3787 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3788
8bdd16b4 3789 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3790
c5e8d7af 3791
8bdd16b4 3792class YoutubePlaylistIE(InfoExtractor):
3793 IE_DESC = 'YouTube.com playlists'
3794 _VALID_URL = r'''(?x)(?:
3795 (?:https?://)?
3796 (?:\w+\.)?
3797 (?:
3798 (?:
3799 youtube(?:kids)?\.com|
29f7c58a 3800 invidio\.us
8bdd16b4 3801 )
3802 /.*?\?.*?\blist=
3803 )?
3804 (?P<id>%(playlist_id)s)
3805 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3806 IE_NAME = 'youtube:playlist'
cdc628a4 3807 _TESTS = [{
8bdd16b4 3808 'note': 'issue #673',
3809 'url': 'PLBB231211A4F62143',
cdc628a4 3810 'info_dict': {
8bdd16b4 3811 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3812 'id': 'PLBB231211A4F62143',
3813 'uploader': 'Wickydoo',
3814 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3815 },
3816 'playlist_mincount': 29,
3817 }, {
3818 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3819 'info_dict': {
3820 'title': 'YDL_safe_search',
3821 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3822 },
3823 'playlist_count': 2,
3824 'skip': 'This playlist is private',
9558dcec 3825 }, {
8bdd16b4 3826 'note': 'embedded',
3827 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3828 'playlist_count': 4,
9558dcec 3829 'info_dict': {
8bdd16b4 3830 'title': 'JODA15',
3831 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3832 'uploader': 'milan',
3833 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3834 }
cdc628a4 3835 }, {
8bdd16b4 3836 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3837 'playlist_mincount': 982,
3838 'info_dict': {
3839 'title': '2018 Chinese New Singles (11/6 updated)',
3840 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3841 'uploader': 'LBK',
3842 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3843 }
daa0df9e 3844 }, {
29f7c58a 3845 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3846 'only_matching': True,
3847 }, {
3848 # music album playlist
3849 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3850 'only_matching': True,
3851 }]
3852
3853 @classmethod
3854 def suitable(cls, url):
201c1459 3855 if YoutubeTabIE.suitable(url):
3856 return False
1bdae7d3 3857 # Hack for lazy extractors until more generic solution is implemented
3858 # (see #28780)
3859 from .youtube import parse_qs
201c1459 3860 qs = parse_qs(url)
3861 if qs.get('v', [None])[0]:
3862 return False
3863 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3864
3865 def _real_extract(self, url):
3866 playlist_id = self._match_id(url)
46953e7e 3867 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 3868 url = update_url_query(
3869 'https://www.youtube.com/playlist',
3870 parse_qs(url) or {'list': playlist_id})
3871 if is_music_url:
3872 url = smuggle_url(url, {'is_music_url': True})
3873 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 3874
3875
3876class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3877 IE_DESC = 'youtu.be'
29f7c58a 3878 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3879 _TESTS = [{
8bdd16b4 3880 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3881 'info_dict': {
3882 'id': 'yeWKywCrFtk',
3883 'ext': 'mp4',
3884 'title': 'Small Scale Baler and Braiding Rugs',
3885 'uploader': 'Backus-Page House Museum',
3886 'uploader_id': 'backuspagemuseum',
3887 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3888 'upload_date': '20161008',
3889 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3890 'categories': ['Nonprofits & Activism'],
3891 'tags': list,
3892 'like_count': int,
3893 'dislike_count': int,
3894 },
3895 'params': {
3896 'noplaylist': True,
3897 'skip_download': True,
3898 },
39e7107d 3899 }, {
8bdd16b4 3900 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3901 'only_matching': True,
cdc628a4
PH
3902 }]
3903
8bdd16b4 3904 def _real_extract(self, url):
29f7c58a 3905 mobj = re.match(self._VALID_URL, url)
3906 video_id = mobj.group('id')
3907 playlist_id = mobj.group('playlist_id')
8bdd16b4 3908 return self.url_result(
29f7c58a 3909 update_url_query('https://www.youtube.com/watch', {
3910 'v': video_id,
3911 'list': playlist_id,
3912 'feature': 'youtu.be',
3913 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3914
3915
3916class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3917 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3918 _VALID_URL = r'ytuser:(?P<id>.+)'
3919 _TESTS = [{
3920 'url': 'ytuser:phihag',
3921 'only_matching': True,
3922 }]
3923
3924 def _real_extract(self, url):
3925 user_id = self._match_id(url)
3926 return self.url_result(
3927 'https://www.youtube.com/user/%s' % user_id,
3928 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3929
b05654f0 3930
3d3dddc9 3931class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3932 IE_NAME = 'youtube:favorites'
3933 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3934 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3935 _LOGIN_REQUIRED = True
3936 _TESTS = [{
3937 'url': ':ytfav',
3938 'only_matching': True,
3939 }, {
3940 'url': ':ytfavorites',
3941 'only_matching': True,
3942 }]
3943
3944 def _real_extract(self, url):
3945 return self.url_result(
3946 'https://www.youtube.com/playlist?list=LL',
3947 ie=YoutubeTabIE.ie_key())
3948
3949
79360d99 3950class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3951 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3952 # there doesn't appear to be a real limit, for example if you search for
3953 # 'python' you get more than 8.000.000 results
3954 _MAX_RESULTS = float('inf')
78caa52a 3955 IE_NAME = 'youtube:search'
b05654f0 3956 _SEARCH_KEY = 'ytsearch'
6c894ea1 3957 _SEARCH_PARAMS = None
9dd8e46a 3958 _TESTS = []
b05654f0 3959
6c894ea1 3960 def _entries(self, query, n):
a5c56234 3961 data = {'query': query}
6c894ea1
U
3962 if self._SEARCH_PARAMS:
3963 data['params'] = self._SEARCH_PARAMS
3964 total = 0
3965 for page_num in itertools.count(1):
79360d99 3966 search = self._extract_response(
3967 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3968 check_get_keys=('contents', 'onResponseReceivedCommands')
3969 )
6c894ea1 3970 if not search:
b4c08069 3971 break
6c894ea1
U
3972 slr_contents = try_get(
3973 search,
3974 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3975 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3976 list)
3977 if not slr_contents:
a22b2fd1 3978 break
0366ae87 3979
0366ae87
M
3980 # Youtube sometimes adds promoted content to searches,
3981 # changing the index location of videos and token.
3982 # So we search through all entries till we find them.
30a074c2 3983 continuation_token = None
3984 for slr_content in slr_contents:
a96c6d15 3985 if continuation_token is None:
3986 continuation_token = try_get(
3987 slr_content,
3988 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3989 compat_str)
3990
30a074c2 3991 isr_contents = try_get(
3992 slr_content,
3993 lambda x: x['itemSectionRenderer']['contents'],
3994 list)
9da76d30 3995 if not isr_contents:
30a074c2 3996 continue
3997 for content in isr_contents:
3998 if not isinstance(content, dict):
3999 continue
4000 video = content.get('videoRenderer')
4001 if not isinstance(video, dict):
4002 continue
4003 video_id = video.get('videoId')
4004 if not video_id:
4005 continue
4006
4007 yield self._extract_video(video)
4008 total += 1
4009 if total == n:
4010 return
0366ae87 4011
0366ae87 4012 if not continuation_token:
6c894ea1 4013 break
0366ae87 4014 data['continuation'] = continuation_token
b05654f0 4015
6c894ea1
U
4016 def _get_n_results(self, query, n):
4017 """Get a specified number of results for a query"""
4018 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4019
c9ae7b95 4020
a3dd9248 4021class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4022 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4023 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4024 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4025 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4026
c9ae7b95 4027
386e1dd9 4028class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4029 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4030 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4031 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4032 # _MAX_RESULTS = 100
3462ffa8 4033 _TESTS = [{
4034 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4035 'playlist_mincount': 5,
4036 'info_dict': {
4037 'title': 'youtube-dl test video',
4038 }
4039 }, {
4040 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4041 'only_matching': True,
4042 }]
4043
386e1dd9 4044 @classmethod
4045 def _make_valid_url(cls):
4046 return cls._VALID_URL
4047
3462ffa8 4048 def _real_extract(self, url):
386e1dd9 4049 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4050 query = (qs.get('search_query') or qs.get('q'))[0]
4051 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4052 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4053
4054
4055class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4056 """
25f14e9f 4057 Base class for feed extractors
3d3dddc9 4058 Subclasses must define the _FEED_NAME property.
d7ae0639 4059 """
b2e8bc1b 4060 _LOGIN_REQUIRED = True
ef2f3c7f 4061 _TESTS = []
d7ae0639
JMF
4062
4063 @property
4064 def IE_NAME(self):
78caa52a 4065 return 'youtube:%s' % self._FEED_NAME
04cc9617 4066
3853309f 4067 def _real_extract(self, url):
3d3dddc9 4068 return self.url_result(
4069 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4070 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4071
4072
ef2f3c7f 4073class YoutubeWatchLaterIE(InfoExtractor):
4074 IE_NAME = 'youtube:watchlater'
70d5c17b 4075 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4076 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4077 _TESTS = [{
8bdd16b4 4078 'url': ':ytwatchlater',
bc7a9cd8
S
4079 'only_matching': True,
4080 }]
25f14e9f
S
4081
4082 def _real_extract(self, url):
ef2f3c7f 4083 return self.url_result(
4084 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4085
4086
25f14e9f
S
4087class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4088 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4089 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4090 _FEED_NAME = 'recommended'
45db527f 4091 _LOGIN_REQUIRED = False
3d3dddc9 4092 _TESTS = [{
4093 'url': ':ytrec',
4094 'only_matching': True,
4095 }, {
4096 'url': ':ytrecommended',
4097 'only_matching': True,
4098 }, {
4099 'url': 'https://youtube.com',
4100 'only_matching': True,
4101 }]
1ed5b5c9 4102
1ed5b5c9 4103
25f14e9f 4104class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4105 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4106 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4107 _FEED_NAME = 'subscriptions'
3d3dddc9 4108 _TESTS = [{
4109 'url': ':ytsubs',
4110 'only_matching': True,
4111 }, {
4112 'url': ':ytsubscriptions',
4113 'only_matching': True,
4114 }]
1ed5b5c9 4115
1ed5b5c9 4116
25f14e9f 4117class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4118 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4119 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4120 _FEED_NAME = 'history'
3d3dddc9 4121 _TESTS = [{
4122 'url': ':ythistory',
4123 'only_matching': True,
4124 }]
1ed5b5c9
JMF
4125
4126
15870e90
PH
4127class YoutubeTruncatedURLIE(InfoExtractor):
4128 IE_NAME = 'youtube:truncated_url'
4129 IE_DESC = False # Do not list
975d35db 4130 _VALID_URL = r'''(?x)
b95aab84
PH
4131 (?:https?://)?
4132 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4133 (?:watch\?(?:
c4808c60 4134 feature=[a-z_]+|
b95aab84
PH
4135 annotation_id=annotation_[^&]+|
4136 x-yt-cl=[0-9]+|
c1708b89 4137 hl=[^&]*|
287be8c6 4138 t=[0-9]+
b95aab84
PH
4139 )?
4140 |
4141 attribution_link\?a=[^&]+
4142 )
4143 $
975d35db 4144 '''
15870e90 4145
c4808c60 4146 _TESTS = [{
2d3d2997 4147 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4148 'only_matching': True,
dc2fc736 4149 }, {
2d3d2997 4150 'url': 'https://www.youtube.com/watch?',
dc2fc736 4151 'only_matching': True,
b95aab84
PH
4152 }, {
4153 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4154 'only_matching': True,
4155 }, {
4156 'url': 'https://www.youtube.com/watch?feature=foo',
4157 'only_matching': True,
c1708b89
PH
4158 }, {
4159 'url': 'https://www.youtube.com/watch?hl=en-GB',
4160 'only_matching': True,
287be8c6
PH
4161 }, {
4162 'url': 'https://www.youtube.com/watch?t=2372',
4163 'only_matching': True,
c4808c60
PH
4164 }]
4165
15870e90
PH
4166 def _real_extract(self, url):
4167 raise ExtractorError(
78caa52a
PH
4168 'Did you forget to quote the URL? Remember that & is a meta '
4169 'character in most shells, so you want to put the URL in quotes, '
3867038a 4170 'like youtube-dl '
2d3d2997 4171 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4172 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4173 expected=True)
772fd5cc
PH
4174
4175
4176class YoutubeTruncatedIDIE(InfoExtractor):
4177 IE_NAME = 'youtube:truncated_id'
4178 IE_DESC = False # Do not list
b95aab84 4179 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4180
4181 _TESTS = [{
4182 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4183 'only_matching': True,
4184 }]
4185
4186 def _real_extract(self, url):
4187 video_id = self._match_id(url)
4188 raise ExtractorError(
4189 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4190 expected=True)