]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube:tab] Redirect `UC` channels that doesn't have a `videos` tab
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
fe03a6cd 70 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|'
68b91dc9 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
68217024 88 username, password = self._get_login_info()
b2e8bc1b
JMF
89 # No authentication to be performed
90 if username is None:
a06916d9 91 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 93 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 94 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 95 return True
b2e8bc1b 96
7cc3570e
PH
97 login_page = self._download_webpage(
98 self._LOGIN_URL, None,
69ea8ca4
PH
99 note='Downloading login page',
100 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
101 if login_page is False:
102 return
b2e8bc1b 103
1212e997 104 login_form = self._hidden_inputs(login_page)
c5e8d7af 105
e00eb564
S
106 def req(url, f_req, note, errnote):
107 data = login_form.copy()
108 data.update({
109 'pstMsg': 1,
110 'checkConnection': 'youtube',
111 'checkedDomains': 'youtube',
112 'hl': 'en',
113 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 114 'f.req': json.dumps(f_req),
e00eb564
S
115 'flowName': 'GlifWebSignIn',
116 'flowEntry': 'ServiceLogin',
baf67a60
S
117 # TODO: reverse actual botguard identifier generation algo
118 'bgRequest': '["identifier",""]',
041bc3ad 119 })
e00eb564
S
120 return self._download_json(
121 url, None, note=note, errnote=errnote,
122 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
123 fatal=False,
124 data=urlencode_postdata(data), headers={
125 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
126 'Google-Accounts-XSRF': 1,
127 })
128
3995d37d 129 def warn(message):
6a39ee13 130 self.report_warning(message)
3995d37d
S
131
132 lookup_req = [
133 username,
134 None, [], None, 'US', None, None, 2, False, True,
135 [
136 None, None,
137 [2, 1, None, 1,
138 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
139 None, [], 4],
140 1, [None, None, []], None, None, None, True
141 ],
142 username,
143 ]
144
e00eb564 145 lookup_results = req(
3995d37d 146 self._LOOKUP_URL, lookup_req,
e00eb564
S
147 'Looking up account info', 'Unable to look up account info')
148
149 if lookup_results is False:
150 return False
041bc3ad 151
3995d37d
S
152 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
153 if not user_hash:
154 warn('Unable to extract user hash')
155 return False
156
157 challenge_req = [
158 user_hash,
159 None, 1, None, [1, None, None, None, [password, None, True]],
160 [
161 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
162 1, [None, None, []], None, None, None, True
163 ]]
83317f69 164
3995d37d
S
165 challenge_results = req(
166 self._CHALLENGE_URL, challenge_req,
167 'Logging in', 'Unable to log in')
83317f69 168
3995d37d 169 if challenge_results is False:
e00eb564 170 return
83317f69 171
3995d37d
S
172 login_res = try_get(challenge_results, lambda x: x[0][5], list)
173 if login_res:
174 login_msg = try_get(login_res, lambda x: x[5], compat_str)
175 warn(
176 'Unable to login: %s' % 'Invalid password'
177 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
178 return False
179
180 res = try_get(challenge_results, lambda x: x[0][-1], list)
181 if not res:
182 warn('Unable to extract result entry')
183 return False
184
9a6628aa
S
185 login_challenge = try_get(res, lambda x: x[0][0], list)
186 if login_challenge:
187 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
188 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
189 # SEND_SUCCESS - TFA code has been successfully sent to phone
190 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 191 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
192 if status == 'QUOTA_EXCEEDED':
193 warn('Exceeded the limit of TFA codes, try later')
194 return False
195
196 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
197 if not tl:
198 warn('Unable to extract TL')
199 return False
200
201 tfa_code = self._get_tfa_info('2-step verification code')
202
203 if not tfa_code:
204 warn(
205 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
206 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
207 return False
208
209 tfa_code = remove_start(tfa_code, 'G-')
210
211 tfa_req = [
212 user_hash, None, 2, None,
213 [
214 9, None, None, None, None, None, None, None,
215 [None, tfa_code, True, 2]
216 ]]
217
218 tfa_results = req(
219 self._TFA_URL.format(tl), tfa_req,
220 'Submitting TFA code', 'Unable to submit TFA code')
221
222 if tfa_results is False:
223 return False
224
225 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
226 if tfa_res:
227 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
228 warn(
229 'Unable to finish TFA: %s' % 'Invalid TFA code'
230 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
231 return False
232
233 check_cookie_url = try_get(
234 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
235 else:
236 CHALLENGES = {
237 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
238 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
239 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
240 }
241 challenge = CHALLENGES.get(
242 challenge_str,
243 '%s returned error %s.' % (self.IE_NAME, challenge_str))
244 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
245 return False
3995d37d
S
246 else:
247 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
248
249 if not check_cookie_url:
250 warn('Unable to extract CheckCookie URL')
251 return False
e00eb564
S
252
253 check_cookie_results = self._download_webpage(
3995d37d
S
254 check_cookie_url, None, 'Checking cookie', fatal=False)
255
256 if check_cookie_results is False:
257 return False
e00eb564 258
3995d37d
S
259 if 'https://myaccount.google.com/' not in check_cookie_results:
260 warn('Unable to log in')
b2e8bc1b 261 return False
e00eb564 262
b2e8bc1b
JMF
263 return True
264
cce889b9 265 def _initialize_consent(self):
266 cookies = self._get_cookies('https://www.youtube.com/')
267 if cookies.get('__Secure-3PSID'):
268 return
269 consent_id = None
270 consent = cookies.get('CONSENT')
271 if consent:
272 if 'YES' in consent.value:
273 return
274 consent_id = self._search_regex(
275 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
276 if not consent_id:
277 consent_id = random.randint(100, 999)
278 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 279
b2e8bc1b 280 def _real_initialize(self):
cce889b9 281 self._initialize_consent()
b2e8bc1b
JMF
282 if self._downloader is None:
283 return
b2e8bc1b
JMF
284 if not self._login():
285 return
c5e8d7af 286
f4f751af 287 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
288 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 289 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 290 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
291 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 292
a5c56234
M
293 def _generate_sapisidhash_header(self):
294 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
295 if sapisid_cookie is None:
296 return
297 time_now = round(time.time())
298 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
299 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
300
301 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 302 note='Downloading API JSON', errnote='Unable to download API page',
303 context=None, api_key=None):
304
305 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 306 data.update(query)
f4f751af 307 real_headers = self._generate_api_headers()
308 real_headers.update({'content-type': 'application/json'})
309 if headers:
310 real_headers.update(headers)
545cc85d 311 return self._download_json(
a5c56234
M
312 'https://www.youtube.com/youtubei/v1/%s' % ep,
313 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 314 data=json.dumps(data).encode('utf8'), headers=real_headers,
315 query={'key': api_key or self._extract_api_key()})
316
317 def _extract_api_key(self, ytcfg=None):
318 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 319
8bdd16b4 320 def _extract_yt_initial_data(self, video_id, webpage):
321 return self._parse_json(
322 self._search_regex(
29f7c58a 323 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 324 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 325 video_id)
0c148415 326
a1c5d2ca
M
327 def _extract_identity_token(self, webpage, item_id):
328 ytcfg = self._extract_ytcfg(item_id, webpage)
329 if ytcfg:
330 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
331 if token:
332 return token
333 return self._search_regex(
334 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
335 'identity token', default=None)
336
337 @staticmethod
338 def _extract_account_syncid(data):
8ea3f7b9 339 """
340 Extract syncId required to download private playlists of secondary channels
341 @param data Either response or ytcfg
342 """
343 sync_ids = (try_get(
344 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
345 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
346 if len(sync_ids) >= 2 and sync_ids[1]:
347 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
348 # and just "user_syncid||" for primary channel. We only want the channel_syncid
349 return sync_ids[0]
8ea3f7b9 350 # ytcfg includes channel_syncid if on secondary channel
351 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 352
29f7c58a 353 def _extract_ytcfg(self, video_id, webpage):
8c54a305 354 if not webpage:
355 return {}
29f7c58a 356 return self._parse_json(
357 self._search_regex(
358 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 359 default='{}'), video_id, fatal=False) or {}
360
361 def __extract_client_version(self, ytcfg):
362 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
363
364 def _extract_context(self, ytcfg=None):
365 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
366 if context:
367 return context
368
369 # Recreate the client context (required)
370 client_version = self.__extract_client_version(ytcfg)
371 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
372 context = {
373 'client': {
374 'clientName': client_name,
375 'clientVersion': client_version,
376 }
377 }
378 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
379 if visitor_data:
380 context['client']['visitorData'] = visitor_data
381 return context
382
383 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
384 headers = {
385 'X-YouTube-Client-Name': '1',
386 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
387 }
388 if identity_token:
389 headers['x-youtube-identity-token'] = identity_token
390 if account_syncid:
391 headers['X-Goog-PageId'] = account_syncid
392 headers['X-Goog-AuthUser'] = 0
393 if visitor_data:
394 headers['x-goog-visitor-id'] = visitor_data
395 auth = self._generate_sapisidhash_header()
396 if auth is not None:
397 headers['Authorization'] = auth
398 headers['X-Origin'] = 'https://www.youtube.com'
399 return headers
29f7c58a 400
9297939e 401 @staticmethod
402 def is_music_url(url):
403 return re.match(r'https?://music\.youtube\.com/', url) is not None
404
30a074c2 405 def _extract_video(self, renderer):
406 video_id = renderer.get('videoId')
407 title = try_get(
408 renderer,
409 (lambda x: x['title']['runs'][0]['text'],
410 lambda x: x['title']['simpleText']), compat_str)
411 description = try_get(
412 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
413 compat_str)
414 duration = parse_duration(try_get(
415 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
416 view_count_text = try_get(
417 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
418 view_count = str_to_int(self._search_regex(
419 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
420 'view count', default=None))
421 uploader = try_get(
bc2ca1bb 422 renderer,
423 (lambda x: x['ownerText']['runs'][0]['text'],
424 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 425 return {
39ed931e 426 '_type': 'url',
30a074c2 427 'ie_key': YoutubeIE.ie_key(),
428 'id': video_id,
429 'url': video_id,
430 'title': title,
431 'description': description,
432 'duration': duration,
433 'view_count': view_count,
434 'uploader': uploader,
435 }
436
0c148415 437
360e1ca5 438class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 439 IE_DESC = 'YouTube.com'
bc2ca1bb 440 _INVIDIOUS_SITES = (
441 # invidious-redirect websites
442 r'(?:www\.)?redirect\.invidious\.io',
443 r'(?:(?:www|dev)\.)?invidio\.us',
444 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
445 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 446 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 447 r'(?:(?:www|au)\.)?ytprivate\.com',
448 r'(?:www\.)?invidious\.namazso\.eu',
449 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 450 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
451 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
452 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
453 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
454 # youtube-dl invidious instances list
455 r'(?:(?:www|no)\.)?invidiou\.sh',
456 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
457 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 458 r'(?:www\.)?invidious\.mastodon\.host',
459 r'(?:www\.)?invidious\.zapashcanon\.fr',
460 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 461 r'(?:www\.)?invidious\.tinfoil-hat\.net',
462 r'(?:www\.)?invidious\.himiko\.cloud',
463 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 464 r'(?:www\.)?invidious\.tube',
465 r'(?:www\.)?invidiou\.site',
466 r'(?:www\.)?invidious\.site',
467 r'(?:www\.)?invidious\.xyz',
468 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 469 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 470 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 471 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 472 r'(?:www\.)?tube\.poal\.co',
473 r'(?:www\.)?tube\.connect\.cafe',
474 r'(?:www\.)?vid\.wxzm\.sx',
475 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 476 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 477 r'(?:www\.)?yewtu\.be',
478 r'(?:www\.)?yt\.elukerio\.org',
479 r'(?:www\.)?yt\.lelux\.fi',
480 r'(?:www\.)?invidious\.ggc-project\.de',
481 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 482 r'(?:www\.)?ytprivate\.com',
483 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 484 r'(?:www\.)?invidious\.toot\.koeln',
485 r'(?:www\.)?invidious\.fdn\.fr',
486 r'(?:www\.)?watch\.nettohikari\.com',
487 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
488 r'(?:www\.)?qklhadlycap4cnod\.onion',
489 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
490 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
491 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
492 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
493 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
494 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
495 )
cb7dfeea 496 _VALID_URL = r"""(?x)^
c5e8d7af 497 (
edb53e2d 498 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 499 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
500 (?:www\.)?deturl\.com/www\.youtube\.com|
501 (?:www\.)?pwnyoutube\.com|
502 (?:www\.)?hooktube\.com|
503 (?:www\.)?yourepeat\.com|
504 tube\.majestyc\.net|
505 %(invidious)s|
506 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
507 (?:.*?\#/)? # handle anchor (#/) redirect urls
508 (?: # the various things that can precede the ID:
ac7553d0 509 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 510 |(?: # or the v= param in all its forms
f7000f3a 511 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 512 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 513 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
514 v=
515 )
f4b05232 516 ))
cbaed4bb
S
517 |(?:
518 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
519 vid\.plus| # or vid.plus/xxxx
520 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 521 %(invidious)s
cbaed4bb 522 )/
edb53e2d 523 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 524 )
c5e8d7af 525 )? # all until now is optional -> you can pass the naked ID
201c1459 526 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 527 (?(1).+)? # if we found the ID, everything can follow
9297939e 528 (?:\#|$)""" % {
bc2ca1bb 529 'invidious': '|'.join(_INVIDIOUS_SITES),
530 }
e40c758c 531 _PLAYER_INFO_RE = (
cc2db878 532 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
533 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 534 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 535 )
2c62dc26 536 _formats = {
c2d3cb4c 537 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
538 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
539 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
540 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
541 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
542 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
543 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
544 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 545 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 546 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
547 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
548 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
549 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
550 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
551 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 552 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 553 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
554 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 555
556
557 # 3D videos
c2d3cb4c 558 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
559 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
560 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
561 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 562 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
563 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
564 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 565
96fb5605 566 # Apple HTTP Live Streaming
11f12195 567 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 568 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
569 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
570 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
571 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
572 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 573 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
574 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
575
576 # DASH mp4 video
d23028a8
S
577 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
578 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
579 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
580 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
581 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 582 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
583 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
584 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
585 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
586 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
587 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
588 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 589
f6f1fc92 590 # Dash mp4 audio
d23028a8
S
591 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
592 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
593 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
594 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
595 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
596 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
597 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
598
599 # Dash webm
d23028a8
S
600 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
601 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
602 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
603 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
604 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
605 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
606 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
607 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
608 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
609 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
610 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
611 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
612 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
613 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
614 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 615 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
616 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
617 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
618 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
619 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
620 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
621 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
622
623 # Dash webm audio
d23028a8
S
624 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
625 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 626
0857baad 627 # Dash webm audio with opus inside
d23028a8
S
628 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
629 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
630 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 631
ce6b9a2d
PH
632 # RTMP (unnamed)
633 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
634
635 # av01 video only formats sometimes served with "unknown" codecs
636 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
637 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
638 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
639 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 640 }
29f7c58a 641 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 642
fd5c4aab
S
643 _GEO_BYPASS = False
644
78caa52a 645 IE_NAME = 'youtube'
2eb88d95
PH
646 _TESTS = [
647 {
2d3d2997 648 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
649 'info_dict': {
650 'id': 'BaW_jenozKc',
651 'ext': 'mp4',
3867038a 652 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
653 'uploader': 'Philipp Hagemeister',
654 'uploader_id': 'phihag',
ec85ded8 655 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
656 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
657 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 658 'upload_date': '20121002',
3867038a 659 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 660 'categories': ['Science & Technology'],
3867038a 661 'tags': ['youtube-dl'],
556dbe7f 662 'duration': 10,
dbdaaa23 663 'view_count': int,
3e7c1224
PH
664 'like_count': int,
665 'dislike_count': int,
7c80519c 666 'start_time': 1,
297a564b 667 'end_time': 9,
2eb88d95 668 }
0e853ca4 669 },
fccd3771 670 {
4bc3a23e
PH
671 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
672 'note': 'Embed-only video (#1746)',
673 'info_dict': {
674 'id': 'yZIXLfi8CZQ',
675 'ext': 'mp4',
676 'upload_date': '20120608',
677 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
678 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
679 'uploader': 'SET India',
94bfcd23 680 'uploader_id': 'setindia',
ec85ded8 681 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 682 'age_limit': 18,
545cc85d 683 },
684 'skip': 'Private video',
fccd3771 685 },
11b56058 686 {
8bdd16b4 687 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
688 'note': 'Use the first video ID in the URL',
689 'info_dict': {
690 'id': 'BaW_jenozKc',
691 'ext': 'mp4',
3867038a 692 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
693 'uploader': 'Philipp Hagemeister',
694 'uploader_id': 'phihag',
ec85ded8 695 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 696 'upload_date': '20121002',
3867038a 697 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 698 'categories': ['Science & Technology'],
3867038a 699 'tags': ['youtube-dl'],
556dbe7f 700 'duration': 10,
dbdaaa23 701 'view_count': int,
11b56058
PM
702 'like_count': int,
703 'dislike_count': int,
34a7de29
S
704 },
705 'params': {
706 'skip_download': True,
707 },
11b56058 708 },
dd27fd17 709 {
2d3d2997 710 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
711 'note': '256k DASH audio (format 141) via DASH manifest',
712 'info_dict': {
713 'id': 'a9LDPn-MO4I',
714 'ext': 'm4a',
715 'upload_date': '20121002',
716 'uploader_id': '8KVIDEO',
ec85ded8 717 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
718 'description': '',
719 'uploader': '8KVIDEO',
720 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 721 },
4bc3a23e
PH
722 'params': {
723 'youtube_include_dash_manifest': True,
724 'format': '141',
4919603f 725 },
de3c7fe0 726 'skip': 'format 141 not served anymore',
dd27fd17 727 },
8bdd16b4 728 # DASH manifest with encrypted signature
729 {
730 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
731 'info_dict': {
732 'id': 'IB3lcPjvWLA',
733 'ext': 'm4a',
734 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
735 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
736 'duration': 244,
737 'uploader': 'AfrojackVEVO',
738 'uploader_id': 'AfrojackVEVO',
739 'upload_date': '20131011',
cc2db878 740 'abr': 129.495,
8bdd16b4 741 },
742 'params': {
743 'youtube_include_dash_manifest': True,
744 'format': '141/bestaudio[ext=m4a]',
745 },
746 },
aa79ac0c
PH
747 # Controversy video
748 {
749 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
750 'info_dict': {
751 'id': 'T4XJQO3qol8',
752 'ext': 'mp4',
556dbe7f 753 'duration': 219,
aa79ac0c 754 'upload_date': '20100909',
4fe54c12 755 'uploader': 'Amazing Atheist',
aa79ac0c 756 'uploader_id': 'TheAmazingAtheist',
ec85ded8 757 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 758 'title': 'Burning Everyone\'s Koran',
545cc85d 759 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 760 }
c522adb1 761 },
dd2d55f1 762 # Normal age-gate video (embed allowed)
c522adb1 763 {
2d3d2997 764 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
765 'info_dict': {
766 'id': 'HtVdAasjOgU',
767 'ext': 'mp4',
768 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 769 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 770 'duration': 142,
c522adb1
JMF
771 'uploader': 'The Witcher',
772 'uploader_id': 'WitcherGame',
ec85ded8 773 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 774 'upload_date': '20140605',
34952f09 775 'age_limit': 18,
c522adb1
JMF
776 },
777 },
8bdd16b4 778 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
779 # YouTube Red ad is not captured for creator
780 {
781 'url': '__2ABJjxzNo',
782 'info_dict': {
783 'id': '__2ABJjxzNo',
784 'ext': 'mp4',
785 'duration': 266,
786 'upload_date': '20100430',
787 'uploader_id': 'deadmau5',
788 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 789 'creator': 'deadmau5',
790 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 791 'uploader': 'deadmau5',
792 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 793 'alt_title': 'Some Chords',
8bdd16b4 794 },
795 'expected_warnings': [
796 'DASH manifest missing',
797 ]
798 },
067aa17e 799 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
800 {
801 'url': 'lqQg6PlCWgI',
802 'info_dict': {
803 'id': 'lqQg6PlCWgI',
804 'ext': 'mp4',
556dbe7f 805 'duration': 6085,
90227264 806 'upload_date': '20150827',
cbe2bd91 807 'uploader_id': 'olympic',
ec85ded8 808 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 809 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 810 'uploader': 'Olympic',
cbe2bd91
PH
811 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
812 },
813 'params': {
814 'skip_download': 'requires avconv',
e52a40ab 815 }
cbe2bd91 816 },
6271f1ca
PH
817 # Non-square pixels
818 {
819 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
820 'info_dict': {
821 'id': '_b-2C3KPAM0',
822 'ext': 'mp4',
823 'stretched_ratio': 16 / 9.,
556dbe7f 824 'duration': 85,
6271f1ca
PH
825 'upload_date': '20110310',
826 'uploader_id': 'AllenMeow',
ec85ded8 827 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 828 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 829 'uploader': '孫ᄋᄅ',
6271f1ca
PH
830 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
831 },
06b491eb
S
832 },
833 # url_encoded_fmt_stream_map is empty string
834 {
835 'url': 'qEJwOuvDf7I',
836 'info_dict': {
837 'id': 'qEJwOuvDf7I',
f57b7835 838 'ext': 'webm',
06b491eb
S
839 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
840 'description': '',
841 'upload_date': '20150404',
842 'uploader_id': 'spbelect',
843 'uploader': 'Наблюдатели Петербурга',
844 },
845 'params': {
846 'skip_download': 'requires avconv',
e323cf3f
S
847 },
848 'skip': 'This live event has ended.',
06b491eb 849 },
067aa17e 850 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
851 {
852 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
853 'info_dict': {
854 'id': 'FIl7x6_3R5Y',
eb6793ba 855 'ext': 'webm',
da77d856
S
856 'title': 'md5:7b81415841e02ecd4313668cde88737a',
857 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 858 'duration': 220,
da77d856
S
859 'upload_date': '20150625',
860 'uploader_id': 'dorappi2000',
ec85ded8 861 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 862 'uploader': 'dorappi2000',
eb6793ba 863 'formats': 'mincount:31',
da77d856 864 },
eb6793ba 865 'skip': 'not actual anymore',
2ee8f5d8 866 },
8a1a26ce
YCH
867 # DASH manifest with segment_list
868 {
869 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
870 'md5': '8ce563a1d667b599d21064e982ab9e31',
871 'info_dict': {
872 'id': 'CsmdDsKjzN8',
873 'ext': 'mp4',
17ee98e1 874 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
875 'uploader': 'Airtek',
876 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
877 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
878 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
879 },
880 'params': {
881 'youtube_include_dash_manifest': True,
882 'format': '135', # bestvideo
be49068d
S
883 },
884 'skip': 'This live event has ended.',
2ee8f5d8 885 },
cf7e015f
S
886 {
887 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 888 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 889 'info_dict': {
545cc85d 890 'id': 'jvGDaLqkpTg',
891 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
892 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
893 },
894 'playlist': [{
895 'info_dict': {
545cc85d 896 'id': 'jvGDaLqkpTg',
cf7e015f 897 'ext': 'mp4',
545cc85d 898 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
899 'description': 'md5:e03b909557865076822aa169218d6a5d',
900 'duration': 10643,
901 'upload_date': '20161111',
902 'uploader': 'Team PGP',
903 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
904 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
905 },
906 }, {
907 'info_dict': {
545cc85d 908 'id': '3AKt1R1aDnw',
cf7e015f 909 'ext': 'mp4',
545cc85d 910 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
911 'description': 'md5:e03b909557865076822aa169218d6a5d',
912 'duration': 10991,
913 'upload_date': '20161111',
914 'uploader': 'Team PGP',
915 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
916 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
917 },
918 }, {
919 'info_dict': {
545cc85d 920 'id': 'RtAMM00gpVc',
cf7e015f 921 'ext': 'mp4',
545cc85d 922 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
923 'description': 'md5:e03b909557865076822aa169218d6a5d',
924 'duration': 10995,
925 'upload_date': '20161111',
926 'uploader': 'Team PGP',
927 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
928 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
929 },
930 }, {
931 'info_dict': {
545cc85d 932 'id': '6N2fdlP3C5U',
cf7e015f 933 'ext': 'mp4',
545cc85d 934 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
935 'description': 'md5:e03b909557865076822aa169218d6a5d',
936 'duration': 10990,
937 'upload_date': '20161111',
938 'uploader': 'Team PGP',
939 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
940 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
941 },
942 }],
943 'params': {
944 'skip_download': True,
945 },
cbaed4bb 946 },
f9f49d87 947 {
067aa17e 948 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
949 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
950 'info_dict': {
951 'id': 'gVfLd0zydlo',
952 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
953 },
954 'playlist_count': 2,
be49068d 955 'skip': 'Not multifeed anymore',
f9f49d87 956 },
cbaed4bb 957 {
2d3d2997 958 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 959 'only_matching': True,
0e49d9a6 960 },
6d4fc66b 961 {
2d3d2997 962 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
963 'only_matching': True,
964 },
0e49d9a6 965 {
067aa17e 966 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 967 # Also tests cut-off URL expansion in video description (see
067aa17e
S
968 # https://github.com/ytdl-org/youtube-dl/issues/1892,
969 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
970 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
971 'info_dict': {
972 'id': 'lsguqyKfVQg',
973 'ext': 'mp4',
974 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 975 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 976 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 977 'duration': 133,
0e49d9a6
LL
978 'upload_date': '20151119',
979 'uploader_id': 'IronSoulElf',
ec85ded8 980 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 981 'uploader': 'IronSoulElf',
eb6793ba
S
982 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
983 'track': 'Dark Walk - Position Music',
984 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 985 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
986 },
987 'params': {
988 'skip_download': True,
989 },
990 },
61f92af1 991 {
067aa17e 992 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
993 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
994 'only_matching': True,
995 },
313dfc45
LL
996 {
997 # Video with yt:stretch=17:0
998 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
999 'info_dict': {
1000 'id': 'Q39EVAstoRM',
1001 'ext': 'mp4',
1002 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1003 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1004 'upload_date': '20151107',
1005 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1006 'uploader': 'CH GAMER DROID',
1007 },
1008 'params': {
1009 'skip_download': True,
1010 },
be49068d 1011 'skip': 'This video does not exist.',
313dfc45 1012 },
201c1459 1013 {
1014 # Video with incomplete 'yt:stretch=16:'
1015 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1016 'only_matching': True,
1017 },
7caf9830
S
1018 {
1019 # Video licensed under Creative Commons
1020 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1021 'info_dict': {
1022 'id': 'M4gD1WSo5mA',
1023 'ext': 'mp4',
1024 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1025 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1026 'duration': 721,
7caf9830
S
1027 'upload_date': '20150127',
1028 'uploader_id': 'BerkmanCenter',
ec85ded8 1029 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1030 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1031 'license': 'Creative Commons Attribution license (reuse allowed)',
1032 },
1033 'params': {
1034 'skip_download': True,
1035 },
1036 },
fd050249
S
1037 {
1038 # Channel-like uploader_url
1039 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1040 'info_dict': {
1041 'id': 'eQcmzGIKrzg',
1042 'ext': 'mp4',
1043 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1044 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1045 'duration': 4060,
fd050249 1046 'upload_date': '20151119',
eb6793ba 1047 'uploader': 'Bernie Sanders',
fd050249 1048 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1049 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1050 'license': 'Creative Commons Attribution license (reuse allowed)',
1051 },
1052 'params': {
1053 'skip_download': True,
1054 },
1055 },
040ac686
S
1056 {
1057 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1058 'only_matching': True,
7f29cf54
S
1059 },
1060 {
067aa17e 1061 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1062 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1063 'only_matching': True,
6496ccb4
S
1064 },
1065 {
1066 # Rental video preview
1067 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1068 'info_dict': {
1069 'id': 'uGpuVWrhIzE',
1070 'ext': 'mp4',
1071 'title': 'Piku - Trailer',
1072 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1073 'upload_date': '20150811',
1074 'uploader': 'FlixMatrix',
1075 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1076 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1077 'license': 'Standard YouTube License',
1078 },
1079 'params': {
1080 'skip_download': True,
1081 },
eb6793ba 1082 'skip': 'This video is not available.',
022a5d66 1083 },
12afdc2a
S
1084 {
1085 # YouTube Red video with episode data
1086 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1087 'info_dict': {
1088 'id': 'iqKdEhx-dD4',
1089 'ext': 'mp4',
1090 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1091 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1092 'duration': 2085,
12afdc2a
S
1093 'upload_date': '20170118',
1094 'uploader': 'Vsauce',
1095 'uploader_id': 'Vsauce',
1096 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1097 'series': 'Mind Field',
1098 'season_number': 1,
1099 'episode_number': 1,
1100 },
1101 'params': {
1102 'skip_download': True,
1103 },
1104 'expected_warnings': [
1105 'Skipping DASH manifest',
1106 ],
1107 },
c7121fa7
S
1108 {
1109 # The following content has been identified by the YouTube community
1110 # as inappropriate or offensive to some audiences.
1111 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1112 'info_dict': {
1113 'id': '6SJNVb0GnPI',
1114 'ext': 'mp4',
1115 'title': 'Race Differences in Intelligence',
1116 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1117 'duration': 965,
1118 'upload_date': '20140124',
1119 'uploader': 'New Century Foundation',
1120 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1121 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1122 },
1123 'params': {
1124 'skip_download': True,
1125 },
545cc85d 1126 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1127 },
022a5d66
S
1128 {
1129 # itag 212
1130 'url': '1t24XAntNCY',
1131 'only_matching': True,
fd5c4aab
S
1132 },
1133 {
1134 # geo restricted to JP
1135 'url': 'sJL6WA-aGkQ',
1136 'only_matching': True,
1137 },
cd5a74a2
S
1138 {
1139 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1140 'only_matching': True,
1141 },
bc2ca1bb 1142 {
1143 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1144 'only_matching': True,
1145 },
1146 {
1147 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1148 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1149 'only_matching': True,
1150 },
825cd268
RA
1151 {
1152 # DRM protected
1153 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1154 'only_matching': True,
4fe54c12
S
1155 },
1156 {
1157 # Video with unsupported adaptive stream type formats
1158 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1159 'info_dict': {
1160 'id': 'Z4Vy8R84T1U',
1161 'ext': 'mp4',
1162 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1163 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1164 'duration': 433,
1165 'upload_date': '20130923',
1166 'uploader': 'Amelia Putri Harwita',
1167 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1168 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1169 'formats': 'maxcount:10',
1170 },
1171 'params': {
1172 'skip_download': True,
1173 'youtube_include_dash_manifest': False,
1174 },
5429d6a9 1175 'skip': 'not actual anymore',
5caabd3c 1176 },
1177 {
822b9d9c 1178 # Youtube Music Auto-generated description
5caabd3c 1179 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1180 'info_dict': {
1181 'id': 'MgNrAu2pzNs',
1182 'ext': 'mp4',
1183 'title': 'Voyeur Girl',
1184 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1185 'upload_date': '20190312',
5429d6a9
S
1186 'uploader': 'Stephen - Topic',
1187 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1188 'artist': 'Stephen',
1189 'track': 'Voyeur Girl',
1190 'album': 'it\'s too much love to know my dear',
1191 'release_date': '20190313',
1192 'release_year': 2019,
1193 },
1194 'params': {
1195 'skip_download': True,
1196 },
1197 },
66b48727
RA
1198 {
1199 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1200 'only_matching': True,
1201 },
011e75e6
S
1202 {
1203 # invalid -> valid video id redirection
1204 'url': 'DJztXj2GPfl',
1205 'info_dict': {
1206 'id': 'DJztXj2GPfk',
1207 'ext': 'mp4',
1208 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1209 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1210 'upload_date': '20090125',
1211 'uploader': 'Prochorowka',
1212 'uploader_id': 'Prochorowka',
1213 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1214 'artist': 'Panjabi MC',
1215 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1216 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1217 },
1218 'params': {
1219 'skip_download': True,
1220 },
545cc85d 1221 'skip': 'Video unavailable',
ea74e00b
DP
1222 },
1223 {
1224 # empty description results in an empty string
1225 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1226 'info_dict': {
1227 'id': 'x41yOUIvK2k',
1228 'ext': 'mp4',
1229 'title': 'IMG 3456',
1230 'description': '',
1231 'upload_date': '20170613',
1232 'uploader_id': 'ElevageOrVert',
1233 'uploader': 'ElevageOrVert',
1234 },
1235 'params': {
1236 'skip_download': True,
1237 },
1238 },
a0566bbf 1239 {
29f7c58a 1240 # with '};' inside yt initial data (see [1])
1241 # see [2] for an example with '};' inside ytInitialPlayerResponse
1242 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1243 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1244 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1245 'info_dict': {
1246 'id': 'CHqg6qOn4no',
1247 'ext': 'mp4',
1248 'title': 'Part 77 Sort a list of simple types in c#',
1249 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1250 'upload_date': '20130831',
1251 'uploader_id': 'kudvenkat',
1252 'uploader': 'kudvenkat',
1253 },
1254 'params': {
1255 'skip_download': True,
1256 },
1257 },
29f7c58a 1258 {
1259 # another example of '};' in ytInitialData
1260 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1261 'only_matching': True,
1262 },
1263 {
1264 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1265 'only_matching': True,
1266 },
545cc85d 1267 {
cc2db878 1268 # https://github.com/ytdl-org/youtube-dl/pull/28094
1269 'url': 'OtqTfy26tG0',
1270 'info_dict': {
1271 'id': 'OtqTfy26tG0',
1272 'ext': 'mp4',
1273 'title': 'Burn Out',
1274 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1275 'upload_date': '20141120',
1276 'uploader': 'The Cinematic Orchestra - Topic',
1277 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1278 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1279 'artist': 'The Cinematic Orchestra',
1280 'track': 'Burn Out',
1281 'album': 'Every Day',
1282 'release_data': None,
1283 'release_year': None,
1284 },
1285 'params': {
1286 'skip_download': True,
1287 },
545cc85d 1288 },
bc2ca1bb 1289 {
1290 # controversial video, only works with bpctr when authenticated with cookies
1291 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1292 'only_matching': True,
1293 },
f7ad7160 1294 {
1295 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1296 'url': 'cBvYw8_A0vQ',
1297 'info_dict': {
1298 'id': 'cBvYw8_A0vQ',
1299 'ext': 'mp4',
1300 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1301 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1302 'upload_date': '20201120',
1303 'uploader': 'Walk around Japan',
1304 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1305 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1306 },
1307 'params': {
1308 'skip_download': True,
1309 },
0fb983f6 1310 }, {
1311 # Has multiple audio streams
1312 'url': 'WaOKSUlf4TM',
1313 'only_matching': True
9297939e 1314 }, {
1315 # Requires Premium: has format 141 when requested using YTM url
1316 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1317 'only_matching': True
1318 }, {
120916da 1319 # multiple subtitles with same lang_code
1320 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1321 'only_matching': True,
1322 },
2eb88d95
PH
1323 ]
1324
201c1459 1325 @classmethod
1326 def suitable(cls, url):
1bdae7d3 1327 # Hack for lazy extractors until more generic solution is implemented
1328 # (see #28780)
1329 from .youtube import parse_qs
201c1459 1330 qs = parse_qs(url)
1331 if qs.get('list', [None])[0]:
1332 return False
1333 return super(YoutubeIE, cls).suitable(url)
1334
e0df6211
PH
1335 def __init__(self, *args, **kwargs):
1336 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1337 self._code_cache = {}
83799698 1338 self._player_cache = {}
e0df6211 1339
60064c53
PH
1340 def _signature_cache_id(self, example_sig):
1341 """ Return a string representation of a signature """
78caa52a 1342 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1343
e40c758c
S
1344 @classmethod
1345 def _extract_player_info(cls, player_url):
1346 for player_re in cls._PLAYER_INFO_RE:
1347 id_m = re.search(player_re, player_url)
1348 if id_m:
1349 break
1350 else:
c081b35c 1351 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1352 return id_m.group('id')
e40c758c
S
1353
1354 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1355 player_id = self._extract_player_info(player_url)
e0df6211 1356
c4417ddb 1357 # Read from filesystem cache
545cc85d 1358 func_id = 'js_%s_%s' % (
1359 player_id, self._signature_cache_id(example_sig))
c4417ddb 1360 assert os.path.basename(func_id) == func_id
a0e07d31 1361
69ea8ca4 1362 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1363 if cache_spec is not None:
78caa52a 1364 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1365
545cc85d 1366 if player_id not in self._code_cache:
1367 self._code_cache[player_id] = self._download_webpage(
e0df6211 1368 player_url, video_id,
545cc85d 1369 note='Downloading player ' + player_id,
69ea8ca4 1370 errnote='Download of %s failed' % player_url)
545cc85d 1371 code = self._code_cache[player_id]
1372 res = self._parse_sig_js(code)
e0df6211 1373
785521bf
PH
1374 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1375 cache_res = res(test_string)
1376 cache_spec = [ord(c) for c in cache_res]
83799698 1377
69ea8ca4 1378 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1379 return res
1380
60064c53 1381 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1382 def gen_sig_code(idxs):
1383 def _genslice(start, end, step):
78caa52a 1384 starts = '' if start == 0 else str(start)
8bcc8756 1385 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1386 steps = '' if step == 1 else (':%d' % step)
78caa52a 1387 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1388
1389 step = None
7af808a5
PH
1390 # Quelch pyflakes warnings - start will be set when step is set
1391 start = '(Never used)'
edf3e38e
PH
1392 for i, prev in zip(idxs[1:], idxs[:-1]):
1393 if step is not None:
1394 if i - prev == step:
1395 continue
1396 yield _genslice(start, prev, step)
1397 step = None
1398 continue
1399 if i - prev in [-1, 1]:
1400 step = i - prev
1401 start = prev
1402 continue
1403 else:
78caa52a 1404 yield 's[%d]' % prev
edf3e38e 1405 if step is None:
78caa52a 1406 yield 's[%d]' % i
edf3e38e
PH
1407 else:
1408 yield _genslice(start, i, step)
1409
78caa52a 1410 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1411 cache_res = func(test_string)
edf3e38e 1412 cache_spec = [ord(c) for c in cache_res]
78caa52a 1413 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1414 signature_id_tuple = '(%s)' % (
1415 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1416 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1417 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1418 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1419
e0df6211
PH
1420 def _parse_sig_js(self, jscode):
1421 funcname = self._search_regex(
abefc03f
S
1422 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1423 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1424 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1425 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1426 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1427 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1428 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1429 # Obsolete patterns
1430 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1431 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1432 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1433 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1434 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1435 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1436 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1437 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1438 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1439
1440 jsi = JSInterpreter(jscode)
1441 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1442 return lambda s: initial_function([s])
1443
545cc85d 1444 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1445 """Turn the encrypted s field into a working signature"""
6b37f0be 1446
c8bf86d5 1447 if player_url is None:
69ea8ca4 1448 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1449
69ea8ca4 1450 if player_url.startswith('//'):
78caa52a 1451 player_url = 'https:' + player_url
3c90cc8b
S
1452 elif not re.match(r'https?://', player_url):
1453 player_url = compat_urlparse.urljoin(
1454 'https://www.youtube.com', player_url)
c8bf86d5 1455 try:
62af3a0e 1456 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1457 if player_id not in self._player_cache:
1458 func = self._extract_signature_function(
60064c53 1459 video_id, player_url, s
c8bf86d5
PH
1460 )
1461 self._player_cache[player_id] = func
1462 func = self._player_cache[player_id]
a06916d9 1463 if self.get_param('youtube_print_sig_code'):
60064c53 1464 self._print_sig_code(func, s)
c8bf86d5
PH
1465 return func(s)
1466 except Exception as e:
1467 tb = traceback.format_exc()
1468 raise ExtractorError(
78caa52a 1469 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1470
545cc85d 1471 def _mark_watched(self, video_id, player_response):
21c340b8
S
1472 playback_url = url_or_none(try_get(
1473 player_response,
545cc85d 1474 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1475 if not playback_url:
1476 return
1477 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1478 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1479
1480 # cpn generation algorithm is reverse engineered from base.js.
1481 # In fact it works even with dummy cpn.
1482 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1483 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1484
1485 qs.update({
1486 'ver': ['2'],
1487 'cpn': [cpn],
1488 })
1489 playback_url = compat_urlparse.urlunparse(
15707c7e 1490 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1491
1492 self._download_webpage(
1493 playback_url, video_id, 'Marking watched',
1494 'Unable to mark watched', fatal=False)
1495
66c9fa36
S
1496 @staticmethod
1497 def _extract_urls(webpage):
1498 # Embedded YouTube player
1499 entries = [
1500 unescapeHTML(mobj.group('url'))
1501 for mobj in re.finditer(r'''(?x)
1502 (?:
1503 <iframe[^>]+?src=|
1504 data-video-url=|
1505 <embed[^>]+?src=|
1506 embedSWF\(?:\s*|
1507 <object[^>]+data=|
1508 new\s+SWFObject\(
1509 )
1510 (["\'])
1511 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1512 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1513 \1''', webpage)]
1514
1515 # lazyYT YouTube embed
1516 entries.extend(list(map(
1517 unescapeHTML,
1518 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1519
1520 # Wordpress "YouTube Video Importer" plugin
1521 matches = re.findall(r'''(?x)<div[^>]+
1522 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1523 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1524 entries.extend(m[-1] for m in matches)
1525
1526 return entries
1527
1528 @staticmethod
1529 def _extract_url(webpage):
1530 urls = YoutubeIE._extract_urls(webpage)
1531 return urls[0] if urls else None
1532
97665381
PH
1533 @classmethod
1534 def extract_id(cls, url):
1535 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1536 if mobj is None:
69ea8ca4 1537 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1538 video_id = mobj.group(2)
1539 return video_id
1540
545cc85d 1541 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1542 chapters_list = try_get(
8bdd16b4 1543 data,
84213ea8
S
1544 lambda x: x['playerOverlays']
1545 ['playerOverlayRenderer']
1546 ['decoratedPlayerBarRenderer']
1547 ['decoratedPlayerBarRenderer']
1548 ['playerBar']
1549 ['chapteredPlayerBarRenderer']
1550 ['chapters'],
1551 list)
1552 if not chapters_list:
1553 return
1554
1555 def chapter_time(chapter):
1556 return float_or_none(
1557 try_get(
1558 chapter,
1559 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1560 int),
1561 scale=1000)
1562 chapters = []
1563 for next_num, chapter in enumerate(chapters_list, start=1):
1564 start_time = chapter_time(chapter)
1565 if start_time is None:
1566 continue
1567 end_time = (chapter_time(chapters_list[next_num])
1568 if next_num < len(chapters_list) else duration)
1569 if end_time is None:
1570 continue
1571 title = try_get(
1572 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1573 compat_str)
1574 chapters.append({
1575 'start_time': start_time,
1576 'end_time': end_time,
1577 'title': title,
1578 })
1579 return chapters
1580
545cc85d 1581 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1582 return self._parse_json(self._search_regex(
1583 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1584 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1585
d92f5d5a 1586 @staticmethod
1587 def parse_time_text(time_text):
1588 """
1589 Parse the comment time text
1590 time_text is in the format 'X units ago (edited)'
1591 """
1592 time_text_split = time_text.split(' ')
1593 if len(time_text_split) >= 3:
1594 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1595
a1c5d2ca
M
1596 @staticmethod
1597 def _join_text_entries(runs):
1598 text = None
1599 for run in runs:
1600 if not isinstance(run, dict):
1601 continue
1602 sub_text = try_get(run, lambda x: x['text'], compat_str)
1603 if sub_text:
1604 if not text:
1605 text = sub_text
1606 continue
1607 text += sub_text
1608 return text
1609
1610 def _extract_comment(self, comment_renderer, parent=None):
1611 comment_id = comment_renderer.get('commentId')
1612 if not comment_id:
1613 return
1614 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1615 text = self._join_text_entries(comment_text_runs) or ''
1616 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1617 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1618 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1619 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1620 author_id = try_get(comment_renderer,
1621 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1622 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1623 lambda x: x['likeCount']), compat_str)) or 0
1624 author_thumbnail = try_get(comment_renderer,
1625 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1626
1627 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1628 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1629 return {
1630 'id': comment_id,
1631 'text': text,
d92f5d5a 1632 'timestamp': timestamp,
a1c5d2ca
M
1633 'time_text': time_text,
1634 'like_count': votes,
1635 'is_favorited': is_liked,
1636 'author': author,
1637 'author_id': author_id,
1638 'author_thumbnail': author_thumbnail,
1639 'author_is_uploader': author_is_uploader,
1640 'parent': parent or 'root'
1641 }
1642
1643 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1644 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1645
1646 def extract_thread(parent_renderer):
1647 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1648 if not parent:
1649 comment_counts[2] = 0
1650 for content in contents:
1651 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1652 comment_renderer = try_get(
1653 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1654 content, (lambda x: x['commentRenderer'], dict))
1655
1656 if not comment_renderer:
1657 continue
1658 comment = self._extract_comment(comment_renderer, parent)
1659 if not comment:
1660 continue
1661 comment_counts[0] += 1
1662 yield comment
1663 # Attempt to get the replies
1664 comment_replies_renderer = try_get(
1665 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1666
1667 if comment_replies_renderer:
1668 comment_counts[2] += 1
1669 comment_entries_iter = self._comment_entries(
f4f751af 1670 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1671 parent=comment.get('id'), session_token_list=session_token_list,
1672 comment_counts=comment_counts)
1673
1674 for reply_comment in comment_entries_iter:
1675 yield reply_comment
1676
1677 if not comment_counts:
1678 # comment so far, est. total comments, current comment thread #
1679 comment_counts = [0, 0, 0]
a1c5d2ca
M
1680
1681 # TODO: Generalize the download code with TabIE
f4f751af 1682 context = self._extract_context(ytcfg)
1683 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1684 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1685 first_continuation = False
1686 if parent is None:
1687 first_continuation = True
1688
1689 for page_num in itertools.count(0):
1690 if not continuation:
1691 break
f4f751af 1692 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a06916d9 1693 retries = self.get_param('extractor_retries', 3)
a1c5d2ca
M
1694 count = -1
1695 last_error = None
1696
1697 while count < retries:
1698 count += 1
1699 if last_error:
1700 self.report_warning('%s. Retrying ...' % last_error)
1701 try:
1702 query = {
1703 'ctoken': continuation['ctoken'],
1704 'pbj': 1,
1705 'type': 'next',
1706 }
1707 if parent:
1708 query['action_get_comment_replies'] = 1
1709 else:
1710 query['action_get_comments'] = 1
1711
1712 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1713 if page_num == 0:
1714 if first_continuation:
d92f5d5a 1715 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1716 else:
d92f5d5a 1717 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1718 else:
d92f5d5a 1719 note_prefix = '%sDownloading comment%s page %d %s' % (
1720 ' ' if parent else '',
a1c5d2ca
M
1721 ' replies' if parent else '',
1722 page_num,
1723 comment_prog_str)
1724
1725 browse = self._download_json(
1726 'https://www.youtube.com/comment_service_ajax', None,
1727 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1728 headers=headers, query=query,
1729 data=urlencode_postdata({
1730 'session_token': session_token_list[0]
1731 }))
1732 except ExtractorError as e:
1733 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1734 if e.cause.code == 413:
d92f5d5a 1735 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1736 return
1737 # Downloading page may result in intermittent 5xx HTTP error
1738 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1739 last_error = 'HTTP Error %s' % e.cause.code
1740 if e.cause.code == 404:
d92f5d5a 1741 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1742 if count < retries:
1743 continue
1744 raise
1745 else:
1746 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1747 if session_token:
1748 session_token_list[0] = session_token
1749
1750 response = try_get(browse,
1751 (lambda x: x['response'],
1752 lambda x: x[1]['response'])) or {}
1753
1754 if response.get('continuationContents'):
1755 break
1756
1757 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1758 if browse.get('reload'):
d92f5d5a 1759 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1760
1761 # TODO: not tested, merged from old extractor
1762 err_msg = browse.get('externalErrorMessage')
1763 if err_msg:
1764 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1765
1766 # Youtube sometimes sends incomplete data
1767 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1768 last_error = 'Incomplete data received'
1769 if count >= retries:
6a39ee13 1770 raise ExtractorError(last_error)
a1c5d2ca
M
1771
1772 if not response:
1773 break
f4f751af 1774 visitor_data = try_get(
1775 response,
1776 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1777 compat_str) or visitor_data
a1c5d2ca
M
1778
1779 known_continuation_renderers = {
1780 'itemSectionContinuation': extract_thread,
1781 'commentRepliesContinuation': extract_thread
1782 }
1783
1784 # extract next root continuation from the results
1785 continuation_contents = try_get(
1786 response, lambda x: x['continuationContents'], dict) or {}
1787
1788 for key, value in continuation_contents.items():
1789 if key not in known_continuation_renderers:
1790 continue
1791 continuation_renderer = value
1792
1793 if first_continuation:
1794 first_continuation = False
1795 expected_comment_count = try_get(
1796 continuation_renderer,
1797 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1798 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1799 compat_str)
1800
1801 if expected_comment_count:
1802 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1803 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1804 yield comment_counts[1]
1805
1806 # TODO: cli arg.
1807 # 1/True for newest, 0/False for popular (default)
1808 comment_sort_index = int(True)
1809 sort_continuation_renderer = try_get(
1810 continuation_renderer,
1811 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1812 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1813 # If this fails, the initial continuation page
1814 # starts off with popular anyways.
1815 if sort_continuation_renderer:
1816 continuation = YoutubeTabIE._build_continuation_query(
1817 continuation=sort_continuation_renderer.get('continuation'),
1818 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1819 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1820 break
1821
1822 for entry in known_continuation_renderers[key](continuation_renderer):
1823 yield entry
1824
1825 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1826 break
1827
1828 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1829 """Entry for comment extraction"""
1830 comments = []
1831 known_entry_comment_renderers = (
1832 'itemSectionRenderer',
1833 )
1834 estimated_total = 0
1835 for entry in contents:
1836 for key, renderer in entry.items():
1837 if key not in known_entry_comment_renderers:
1838 continue
1839
1840 comment_iter = self._comment_entries(
1841 renderer,
1842 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1843 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1844 ytcfg=ytcfg,
a1c5d2ca
M
1845 session_token_list=[xsrf_token])
1846
1847 for comment in comment_iter:
1848 if isinstance(comment, int):
1849 estimated_total = comment
1850 continue
1851 comments.append(comment)
1852 break
d92f5d5a 1853 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1854 return {
1855 'comments': comments,
1856 'comment_count': len(comments),
1857 }
1858
c5e8d7af 1859 def _real_extract(self, url):
cf7e015f 1860 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1861 video_id = self._match_id(url)
9297939e 1862
1863 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
1864
545cc85d 1865 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1866 webpage_url = base_url + 'watch?v=' + video_id
1867 webpage = self._download_webpage(
cce889b9 1868 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1869
9297939e 1870 def get_text(x):
1871 if not x:
1872 return
1873 text = x.get('simpleText')
1874 if text and isinstance(text, compat_str):
1875 return text
1876 runs = x.get('runs')
1877 if not isinstance(runs, list):
1878 return
1879 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
1880
1881 ytm_streaming_data = {}
1882 if is_music_url:
1883 # we are forcing to use parse_json because 141 only appeared in get_video_info.
1884 # el, c, cver, cplayer field required for 141(aac 256kbps) codec
1885 # maybe paramter of youtube music player?
1886 ytm_player_response = self._parse_json(try_get(compat_parse_qs(
1887 self._download_webpage(
1888 base_url + 'get_video_info', video_id,
fe03a6cd 1889 'Fetching youtube music info webpage',
1890 'unable to download youtube music info webpage', query={
9297939e 1891 'video_id': video_id,
1892 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1893 'el': 'detailpage',
1894 'c': 'WEB_REMIX',
1895 'cver': '0.1',
1896 'cplayer': 'UNIPLAYER'
1897 }, fatal=False)),
1898 lambda x: x['player_response'][0],
1899 compat_str) or '{}', video_id)
1900 ytm_streaming_data = ytm_player_response.get('streamingData') or {}
1901
545cc85d 1902 player_response = None
1903 if webpage:
1904 player_response = self._extract_yt_initial_variable(
1905 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1906 video_id, 'initial player response')
f4f751af 1907
1908 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1909 if not player_response:
1910 player_response = self._call_api(
f4f751af 1911 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1912
1913 playability_status = player_response.get('playabilityStatus') or {}
1914 if playability_status.get('reason') == 'Sign in to confirm your age':
1915 pr = self._parse_json(try_get(compat_parse_qs(
1916 self._download_webpage(
1917 base_url + 'get_video_info', video_id,
1918 'Refetching age-gated info webpage',
1919 'unable to download video info webpage', query={
1920 'video_id': video_id,
7c60c33e 1921 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1922 }, fatal=False)),
1923 lambda x: x['player_response'][0],
1924 compat_str) or '{}', video_id)
1925 if pr:
1926 player_response = pr
1927
1928 trailer_video_id = try_get(
1929 playability_status,
1930 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1931 compat_str)
1932 if trailer_video_id:
1933 return self.url_result(
1934 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1935
545cc85d 1936 search_meta = (
1937 lambda x: self._html_search_meta(x, webpage, default=None)) \
1938 if webpage else lambda x: None
dbdaaa23 1939
545cc85d 1940 video_details = player_response.get('videoDetails') or {}
37357d21 1941 microformat = try_get(
545cc85d 1942 player_response,
1943 lambda x: x['microformat']['playerMicroformatRenderer'],
1944 dict) or {}
1945 video_title = video_details.get('title') \
1946 or get_text(microformat.get('title')) \
1947 or search_meta(['og:title', 'twitter:title', 'title'])
1948 video_description = video_details.get('shortDescription')
cf7e015f 1949
8fe10494 1950 if not smuggled_data.get('force_singlefeed', False):
a06916d9 1951 if not self.get_param('noplaylist'):
8fe10494
S
1952 multifeed_metadata_list = try_get(
1953 player_response,
1954 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1955 compat_str)
8fe10494
S
1956 if multifeed_metadata_list:
1957 entries = []
1958 feed_ids = []
1959 for feed in multifeed_metadata_list.split(','):
1960 # Unquote should take place before split on comma (,) since textual
1961 # fields may contain comma as well (see
067aa17e 1962 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1963 feed_data = compat_parse_qs(
1964 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1965
1966 def feed_entry(name):
545cc85d 1967 return try_get(
1968 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1969
1970 feed_id = feed_entry('id')
1971 if not feed_id:
1972 continue
1973 feed_title = feed_entry('title')
1974 title = video_title
1975 if feed_title:
1976 title += ' (%s)' % feed_title
8fe10494
S
1977 entries.append({
1978 '_type': 'url_transparent',
1979 'ie_key': 'Youtube',
1980 'url': smuggle_url(
545cc85d 1981 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1982 {'force_singlefeed': True}),
6b09401b 1983 'title': title,
8fe10494 1984 })
6b09401b 1985 feed_ids.append(feed_id)
8fe10494
S
1986 self.to_screen(
1987 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1988 % (', '.join(feed_ids), video_id))
545cc85d 1989 return self.playlist_result(
1990 entries, video_id, video_title, video_description)
8fe10494
S
1991 else:
1992 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1993
9297939e 1994 formats, itags, stream_ids = [], [], []
cc2db878 1995 itag_qualities = {}
545cc85d 1996 player_url = None
dca3ff4a 1997 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
9297939e 1998
545cc85d 1999 streaming_data = player_response.get('streamingData') or {}
2000 streaming_formats = streaming_data.get('formats') or []
2001 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2002 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2003 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2004
545cc85d 2005 for fmt in streaming_formats:
2006 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2007 continue
321bf820 2008
cc2db878 2009 itag = str_or_none(fmt.get('itag'))
9297939e 2010 audio_track = fmt.get('audioTrack') or {}
2011 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2012 if stream_id in stream_ids:
2013 continue
2014
cc2db878 2015 quality = fmt.get('quality')
2016 if itag and quality:
2017 itag_qualities[itag] = quality
2018 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2019 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2020 # number of fragment that would subsequently requested with (`&sq=N`)
2021 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2022 continue
2023
545cc85d 2024 fmt_url = fmt.get('url')
2025 if not fmt_url:
2026 sc = compat_parse_qs(fmt.get('signatureCipher'))
2027 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2028 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2029 if not (sc and fmt_url and encrypted_sig):
2030 continue
2031 if not player_url:
2032 if not webpage:
2033 continue
2034 player_url = self._search_regex(
2035 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
2036 webpage, 'player URL', fatal=False)
2037 if not player_url:
201e9eaa 2038 continue
545cc85d 2039 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2040 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2041 fmt_url += '&' + sp + '=' + signature
2042
545cc85d 2043 if itag:
2044 itags.append(itag)
9297939e 2045 stream_ids.append(stream_id)
2046
cc2db878 2047 tbr = float_or_none(
2048 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2049 dct = {
2050 'asr': int_or_none(fmt.get('audioSampleRate')),
2051 'filesize': int_or_none(fmt.get('contentLength')),
2052 'format_id': itag,
0fb983f6 2053 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2054 'fps': int_or_none(fmt.get('fps')),
2055 'height': int_or_none(fmt.get('height')),
dca3ff4a 2056 'quality': q(quality),
cc2db878 2057 'tbr': tbr,
545cc85d 2058 'url': fmt_url,
2059 'width': fmt.get('width'),
0fb983f6 2060 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2061 }
2062 mimetype = fmt.get('mimeType')
2063 if mimetype:
2064 mobj = re.match(
2065 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2066 if mobj:
2067 dct['ext'] = mimetype2ext(mobj.group(1))
2068 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2069 no_audio = dct.get('acodec') == 'none'
2070 no_video = dct.get('vcodec') == 'none'
2071 if no_audio:
2072 dct['vbr'] = tbr
2073 if no_video:
2074 dct['abr'] = tbr
2075 if no_audio or no_video:
545cc85d 2076 dct['downloader_options'] = {
2077 # Youtube throttles chunks >~10M
2078 'http_chunk_size': 10485760,
bf1317d2 2079 }
7c60c33e 2080 if dct.get('ext'):
2081 dct['container'] = dct['ext'] + '_dash'
545cc85d 2082 formats.append(dct)
2083
9297939e 2084 for sd in (streaming_data, ytm_streaming_data):
2085 hls_manifest_url = sd.get('hlsManifestUrl')
2086 if hls_manifest_url:
2087 for f in self._extract_m3u8_formats(
2088 hls_manifest_url, video_id, 'mp4', fatal=False):
2089 itag = self._search_regex(
2090 r'/itag/(\d+)', f['url'], 'itag', default=None)
2091 if itag:
2092 f['format_id'] = itag
545cc85d 2093 formats.append(f)
2094
a06916d9 2095 if self.get_param('youtube_include_dash_manifest', True):
9297939e 2096 for sd in (streaming_data, ytm_streaming_data):
2097 dash_manifest_url = sd.get('dashManifestUrl')
2098 if dash_manifest_url:
2099 for f in self._extract_mpd_formats(
2100 dash_manifest_url, video_id, fatal=False):
2101 itag = f['format_id']
2102 if itag in itags:
2103 continue
2104 if itag in itag_qualities:
2105 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
2106 # but kept to maintain feature parity (and code similarity) with youtube-dl
2107 # Remove if this causes any issues with sorting in future
2108 f['quality'] = q(itag_qualities[itag])
2109 filesize = int_or_none(self._search_regex(
2110 r'/clen/(\d+)', f.get('fragment_base_url')
2111 or f['url'], 'file size', default=None))
2112 if filesize:
2113 f['filesize'] = filesize
2114 formats.append(f)
bf1317d2 2115
545cc85d 2116 if not formats:
a06916d9 2117 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2118 self.raise_no_formats(
545cc85d 2119 'This video is DRM protected.', expected=True)
2120 pemr = try_get(
2121 playability_status,
2122 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2123 dict) or {}
2124 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2125 subreason = pemr.get('subreason')
2126 if subreason:
2127 subreason = clean_html(get_text(subreason))
2128 if subreason == 'The uploader has not made this video available in your country.':
2129 countries = microformat.get('availableCountries')
2130 if not countries:
2131 regions_allowed = search_meta('regionsAllowed')
2132 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2133 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2134 reason += '\n' + subreason
2135 if reason:
b7da73eb 2136 self.raise_no_formats(reason, expected=True)
bf1317d2 2137
545cc85d 2138 self._sort_formats(formats)
bf1317d2 2139
545cc85d 2140 keywords = video_details.get('keywords') or []
2141 if not keywords and webpage:
2142 keywords = [
2143 unescapeHTML(m.group('content'))
2144 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2145 for keyword in keywords:
2146 if keyword.startswith('yt:stretch='):
201c1459 2147 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2148 if mobj:
2149 # NB: float is intentional for forcing float division
2150 w, h = (float(v) for v in mobj.groups())
2151 if w > 0 and h > 0:
2152 ratio = w / h
2153 for f in formats:
2154 if f.get('vcodec') != 'none':
2155 f['stretched_ratio'] = ratio
2156 break
6449cd80 2157
545cc85d 2158 thumbnails = []
2159 for container in (video_details, microformat):
2160 for thumbnail in (try_get(
2161 container,
2162 lambda x: x['thumbnail']['thumbnails'], list) or []):
2163 thumbnail_url = thumbnail.get('url')
2164 if not thumbnail_url:
bf1317d2 2165 continue
1988fab7 2166 # Sometimes youtube gives a wrong thumbnail URL. See:
2167 # https://github.com/yt-dlp/yt-dlp/issues/233
2168 # https://github.com/ytdl-org/youtube-dl/issues/28023
2169 if 'maxresdefault' in thumbnail_url:
2170 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2171 thumbnails.append({
2172 'height': int_or_none(thumbnail.get('height')),
2173 'url': thumbnail_url,
2174 'width': int_or_none(thumbnail.get('width')),
2175 })
2176 if thumbnails:
2177 break
a6211d23 2178 else:
545cc85d 2179 thumbnail = search_meta(['og:image', 'twitter:image'])
2180 if thumbnail:
2181 thumbnails = [{'url': thumbnail}]
2182
2183 category = microformat.get('category') or search_meta('genre')
2184 channel_id = video_details.get('channelId') \
2185 or microformat.get('externalChannelId') \
2186 or search_meta('channelId')
2187 duration = int_or_none(
2188 video_details.get('lengthSeconds')
2189 or microformat.get('lengthSeconds')) \
2190 or parse_duration(search_meta('duration'))
2191 is_live = video_details.get('isLive')
2192 owner_profile_url = microformat.get('ownerProfileUrl')
2193
2194 info = {
2195 'id': video_id,
2196 'title': self._live_title(video_title) if is_live else video_title,
2197 'formats': formats,
2198 'thumbnails': thumbnails,
2199 'description': video_description,
2200 'upload_date': unified_strdate(
2201 microformat.get('uploadDate')
2202 or search_meta('uploadDate')),
2203 'uploader': video_details['author'],
2204 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2205 'uploader_url': owner_profile_url,
2206 'channel_id': channel_id,
2207 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2208 'duration': duration,
2209 'view_count': int_or_none(
2210 video_details.get('viewCount')
2211 or microformat.get('viewCount')
2212 or search_meta('interactionCount')),
2213 'average_rating': float_or_none(video_details.get('averageRating')),
2214 'age_limit': 18 if (
2215 microformat.get('isFamilySafe') is False
2216 or search_meta('isFamilyFriendly') == 'false'
2217 or search_meta('og:restrictions:age') == '18+') else 0,
2218 'webpage_url': webpage_url,
2219 'categories': [category] if category else None,
2220 'tags': keywords,
2221 'is_live': is_live,
2222 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2223 'was_live': video_details.get('isLiveContent'),
545cc85d 2224 }
b477fc13 2225
545cc85d 2226 pctr = try_get(
2227 player_response,
2228 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2229 subtitles = {}
2230 if pctr:
774d79cc 2231 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2232 lang_subs = container.setdefault(lang_code, [])
545cc85d 2233 for fmt in self._SUBTITLE_FORMATS:
2234 query.update({
2235 'fmt': fmt,
2236 })
2237 lang_subs.append({
2238 'ext': fmt,
2239 'url': update_url_query(base_url, query),
774d79cc 2240 'name': sub_name,
545cc85d 2241 })
7e72694b 2242
545cc85d 2243 for caption_track in (pctr.get('captionTracks') or []):
2244 base_url = caption_track.get('baseUrl')
2245 if not base_url:
2246 continue
2247 if caption_track.get('kind') != 'asr':
120916da 2248 lang_code = (
2249 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2250 or caption_track.get('languageCode'))
545cc85d 2251 if not lang_code:
2252 continue
2253 process_language(
774d79cc 2254 subtitles, base_url, lang_code,
2255 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2256 {})
545cc85d 2257 continue
2258 automatic_captions = {}
2259 for translation_language in (pctr.get('translationLanguages') or []):
2260 translation_language_code = translation_language.get('languageCode')
2261 if not translation_language_code:
2262 continue
2263 process_language(
2264 automatic_captions, base_url, translation_language_code,
774d79cc 2265 try_get(translation_language, lambda x: x['languageName']['simpleText']),
545cc85d 2266 {'tlang': translation_language_code})
2267 info['automatic_captions'] = automatic_captions
2268 info['subtitles'] = subtitles
7e72694b 2269
545cc85d 2270 parsed_url = compat_urllib_parse_urlparse(url)
2271 for component in [parsed_url.fragment, parsed_url.query]:
2272 query = compat_parse_qs(component)
2273 for k, v in query.items():
2274 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2275 d_k += '_time'
2276 if d_k not in info and k in s_ks:
2277 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2278
2279 # Youtube Music Auto-generated description
822b9d9c 2280 if video_description:
38d70284 2281 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2282 if mobj:
822b9d9c
RA
2283 release_year = mobj.group('release_year')
2284 release_date = mobj.group('release_date')
2285 if release_date:
2286 release_date = release_date.replace('-', '')
2287 if not release_year:
545cc85d 2288 release_year = release_date[:4]
2289 info.update({
2290 'album': mobj.group('album'.strip()),
2291 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2292 'track': mobj.group('track').strip(),
2293 'release_date': release_date,
cc2db878 2294 'release_year': int_or_none(release_year),
545cc85d 2295 })
7e72694b 2296
545cc85d 2297 initial_data = None
2298 if webpage:
2299 initial_data = self._extract_yt_initial_variable(
2300 webpage, self._YT_INITIAL_DATA_RE, video_id,
2301 'yt initial data')
2302 if not initial_data:
2303 initial_data = self._call_api(
f4f751af 2304 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2305
2306 if not is_live:
2307 try:
2308 # This will error if there is no livechat
2309 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2310 info['subtitles']['live_chat'] = [{
394dcd44 2311 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2312 'video_id': video_id,
2313 'ext': 'json',
2314 'protocol': 'youtube_live_chat_replay',
2315 }]
2316 except (KeyError, IndexError, TypeError):
2317 pass
2318
2319 if initial_data:
2320 chapters = self._extract_chapters_from_json(
2321 initial_data, video_id, duration)
2322 if not chapters:
2323 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2324 contents = try_get(
2325 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2326 list)
2327 if not contents:
2328 continue
2329
2330 def chapter_time(mmlir):
2331 return parse_duration(
2332 get_text(mmlir.get('timeDescription')))
2333
2334 chapters = []
2335 for next_num, content in enumerate(contents, start=1):
2336 mmlir = content.get('macroMarkersListItemRenderer') or {}
2337 start_time = chapter_time(mmlir)
2338 end_time = chapter_time(try_get(
2339 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2340 if next_num < len(contents) else duration
2341 if start_time is None or end_time is None:
2342 continue
2343 chapters.append({
2344 'start_time': start_time,
2345 'end_time': end_time,
2346 'title': get_text(mmlir.get('title')),
2347 })
2348 if chapters:
2349 break
2350 if chapters:
2351 info['chapters'] = chapters
2352
2353 contents = try_get(
2354 initial_data,
2355 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2356 list) or []
2357 for content in contents:
2358 vpir = content.get('videoPrimaryInfoRenderer')
2359 if vpir:
2360 stl = vpir.get('superTitleLink')
2361 if stl:
2362 stl = get_text(stl)
2363 if try_get(
2364 vpir,
2365 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2366 info['location'] = stl
2367 else:
2368 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2369 if mobj:
2370 info.update({
2371 'series': mobj.group(1),
2372 'season_number': int(mobj.group(2)),
2373 'episode_number': int(mobj.group(3)),
2374 })
2375 for tlb in (try_get(
2376 vpir,
2377 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2378 list) or []):
2379 tbr = tlb.get('toggleButtonRenderer') or {}
2380 for getter, regex in [(
2381 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2382 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2383 lambda x: x['accessibility'],
2384 lambda x: x['accessibilityData']['accessibilityData'],
2385 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2386 label = (try_get(tbr, getter, dict) or {}).get('label')
2387 if label:
2388 mobj = re.match(regex, label)
2389 if mobj:
2390 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2391 break
2392 sbr_tooltip = try_get(
2393 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2394 if sbr_tooltip:
2395 like_count, dislike_count = sbr_tooltip.split(' / ')
2396 info.update({
2397 'like_count': str_to_int(like_count),
2398 'dislike_count': str_to_int(dislike_count),
2399 })
2400 vsir = content.get('videoSecondaryInfoRenderer')
2401 if vsir:
2402 info['channel'] = get_text(try_get(
2403 vsir,
2404 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2405 dict))
545cc85d 2406 rows = try_get(
2407 vsir,
2408 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2409 list) or []
2410 multiple_songs = False
2411 for row in rows:
2412 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2413 multiple_songs = True
2414 break
2415 for row in rows:
2416 mrr = row.get('metadataRowRenderer') or {}
2417 mrr_title = mrr.get('title')
2418 if not mrr_title:
2419 continue
2420 mrr_title = get_text(mrr['title'])
2421 mrr_contents_text = get_text(mrr['contents'][0])
2422 if mrr_title == 'License':
2423 info['license'] = mrr_contents_text
2424 elif not multiple_songs:
2425 if mrr_title == 'Album':
2426 info['album'] = mrr_contents_text
2427 elif mrr_title == 'Artist':
2428 info['artist'] = mrr_contents_text
2429 elif mrr_title == 'Song':
2430 info['track'] = mrr_contents_text
2431
2432 fallbacks = {
2433 'channel': 'uploader',
2434 'channel_id': 'uploader_id',
2435 'channel_url': 'uploader_url',
2436 }
2437 for to, frm in fallbacks.items():
2438 if not info.get(to):
2439 info[to] = info.get(frm)
2440
2441 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2442 v = info.get(s_k)
2443 if v:
2444 info[d_k] = v
b84071c0 2445
c224251a
M
2446 is_private = bool_or_none(video_details.get('isPrivate'))
2447 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2448 is_membersonly = None
b28f8d24 2449 is_premium = None
c224251a
M
2450 if initial_data and is_private is not None:
2451 is_membersonly = False
b28f8d24 2452 is_premium = False
c224251a
M
2453 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2454 for content in contents or []:
2455 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2456 for badge in badges or []:
2457 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2458 if label.lower() == 'members only':
2459 is_membersonly = True
2460 break
b28f8d24
M
2461 elif label.lower() == 'premium':
2462 is_premium = True
2463 break
2464 if is_membersonly or is_premium:
c224251a
M
2465 break
2466
2467 # TODO: Add this for playlists
2468 info['availability'] = self._availability(
2469 is_private=is_private,
b28f8d24 2470 needs_premium=is_premium,
c224251a
M
2471 needs_subscription=is_membersonly,
2472 needs_auth=info['age_limit'] >= 18,
2473 is_unlisted=None if is_private is None else is_unlisted)
2474
06167fbb 2475 # get xsrf for annotations or comments
a06916d9 2476 get_annotations = self.get_param('writeannotations', False)
2477 get_comments = self.get_param('getcomments', False)
06167fbb 2478 if get_annotations or get_comments:
29f7c58a 2479 xsrf_token = None
545cc85d 2480 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2481 if ytcfg:
2482 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2483 if not xsrf_token:
2484 xsrf_token = self._search_regex(
2485 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2486 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2487
2488 # annotations
06167fbb 2489 if get_annotations:
64b6a4e9
RA
2490 invideo_url = try_get(
2491 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2492 if xsrf_token and invideo_url:
29f7c58a 2493 xsrf_field_name = None
2494 if ytcfg:
2495 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2496 if not xsrf_field_name:
2497 xsrf_field_name = self._search_regex(
2498 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2499 webpage, 'xsrf field name',
29f7c58a 2500 group='xsrf_field_name', default='session_token')
8a784c74 2501 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2502 self._proto_relative_url(invideo_url),
2503 video_id, note='Downloading annotations',
2504 errnote='Unable to download video annotations', fatal=False,
2505 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2506
277d6ff5 2507 if get_comments:
a1c5d2ca 2508 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2509
545cc85d 2510 self.mark_watched(video_id, player_response)
d77ab8e2 2511
545cc85d 2512 return info
c5e8d7af 2513
5f6a1245 2514
8bdd16b4 2515class YoutubeTabIE(YoutubeBaseInfoExtractor):
2516 IE_DESC = 'YouTube.com tab'
70d5c17b 2517 _VALID_URL = r'''(?x)
2518 https?://
2519 (?:\w+\.)?
2520 (?:
2521 youtube(?:kids)?\.com|
2522 invidio\.us
2523 )/
2524 (?:
fe03a6cd 2525 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 2526 (?P<not_channel>
9ba5705a 2527 feed/|hashtag/|
70d5c17b 2528 (?:playlist|watch)\?.*?\blist=
2529 )|
29f7c58a 2530 (?!(?:%s)\b) # Direct URLs
70d5c17b 2531 )
2532 (?P<id>[^/?\#&]+)
2533 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2534 IE_NAME = 'youtube:tab'
2535
81127aa5 2536 _TESTS = [{
da692b79 2537 'note': 'playlists, multipage',
8bdd16b4 2538 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2539 'playlist_mincount': 94,
2540 'info_dict': {
2541 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2542 'title': 'Игорь Клейнер - Playlists',
2543 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2544 'uploader': 'Игорь Клейнер',
2545 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2546 },
2547 }, {
da692b79 2548 'note': 'playlists, multipage, different order',
8bdd16b4 2549 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2550 'playlist_mincount': 94,
2551 'info_dict': {
2552 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2553 'title': 'Игорь Клейнер - Playlists',
2554 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2555 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2556 'uploader': 'Игорь Клейнер',
8bdd16b4 2557 },
201c1459 2558 }, {
da692b79 2559 'note': 'playlists, series',
201c1459 2560 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2561 'playlist_mincount': 5,
2562 'info_dict': {
2563 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2564 'title': '3Blue1Brown - Playlists',
2565 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 2566 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2567 'uploader': '3Blue1Brown',
201c1459 2568 },
8bdd16b4 2569 }, {
da692b79 2570 'note': 'playlists, singlepage',
8bdd16b4 2571 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2572 'playlist_mincount': 4,
2573 'info_dict': {
2574 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2575 'title': 'ThirstForScience - Playlists',
2576 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2577 'uploader': 'ThirstForScience',
2578 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2579 }
2580 }, {
2581 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2582 'only_matching': True,
2583 }, {
da692b79 2584 'note': 'basic, single video playlist',
0e30a7b9 2585 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2586 'info_dict': {
0e30a7b9 2587 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2588 'uploader': 'Sergey M.',
2589 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2590 'title': 'youtube-dl public playlist',
81127aa5 2591 },
0e30a7b9 2592 'playlist_count': 1,
9291475f 2593 }, {
da692b79 2594 'note': 'empty playlist',
0e30a7b9 2595 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2596 'info_dict': {
0e30a7b9 2597 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2598 'uploader': 'Sergey M.',
2599 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2600 'title': 'youtube-dl empty playlist',
9291475f
PH
2601 },
2602 'playlist_count': 0,
2603 }, {
da692b79 2604 'note': 'Home tab',
8bdd16b4 2605 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2606 'info_dict': {
8bdd16b4 2607 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2608 'title': 'lex will - Home',
2609 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2610 'uploader': 'lex will',
2611 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2612 },
8bdd16b4 2613 'playlist_mincount': 2,
9291475f 2614 }, {
da692b79 2615 'note': 'Videos tab',
8bdd16b4 2616 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2617 'info_dict': {
8bdd16b4 2618 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2619 'title': 'lex will - Videos',
2620 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2621 'uploader': 'lex will',
2622 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2623 },
8bdd16b4 2624 'playlist_mincount': 975,
9291475f 2625 }, {
da692b79 2626 'note': 'Videos tab, sorted by popular',
8bdd16b4 2627 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2628 'info_dict': {
8bdd16b4 2629 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2630 'title': 'lex will - Videos',
2631 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2632 'uploader': 'lex will',
2633 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2634 },
8bdd16b4 2635 'playlist_mincount': 199,
9291475f 2636 }, {
da692b79 2637 'note': 'Playlists tab',
8bdd16b4 2638 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2639 'info_dict': {
8bdd16b4 2640 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2641 'title': 'lex will - Playlists',
2642 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2643 'uploader': 'lex will',
2644 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2645 },
8bdd16b4 2646 'playlist_mincount': 17,
ac7553d0 2647 }, {
da692b79 2648 'note': 'Community tab',
8bdd16b4 2649 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2650 'info_dict': {
8bdd16b4 2651 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2652 'title': 'lex will - Community',
2653 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2654 'uploader': 'lex will',
2655 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2656 },
2657 'playlist_mincount': 18,
87dadd45 2658 }, {
da692b79 2659 'note': 'Channels tab',
8bdd16b4 2660 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2661 'info_dict': {
8bdd16b4 2662 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2663 'title': 'lex will - Channels',
2664 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2665 'uploader': 'lex will',
2666 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2667 },
deaec5af 2668 'playlist_mincount': 12,
cd684175 2669 }, {
2670 'note': 'Search tab',
2671 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
2672 'playlist_mincount': 40,
2673 'info_dict': {
2674 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2675 'title': '3Blue1Brown - Search - linear algebra',
2676 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2677 'uploader': '3Blue1Brown',
2678 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2679 },
6b08cdf6 2680 }, {
a0566bbf 2681 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2682 'only_matching': True,
2683 }, {
a0566bbf 2684 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2685 'only_matching': True,
2686 }, {
a0566bbf 2687 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2688 'only_matching': True,
2689 }, {
2690 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2691 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2692 'info_dict': {
2693 'title': '29C3: Not my department',
2694 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2695 'uploader': 'Christiaan008',
2696 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2697 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2698 },
2699 'playlist_count': 96,
2700 }, {
2701 'note': 'Large playlist',
2702 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2703 'info_dict': {
8bdd16b4 2704 'title': 'Uploads from Cauchemar',
2705 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2706 'uploader': 'Cauchemar',
2707 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2708 },
8bdd16b4 2709 'playlist_mincount': 1123,
2710 }, {
da692b79 2711 'note': 'even larger playlist, 8832 videos',
8bdd16b4 2712 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2713 'only_matching': True,
4b7df0d3
JMF
2714 }, {
2715 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2716 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2717 'info_dict': {
acf757f4
PH
2718 'title': 'Uploads from Interstellar Movie',
2719 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2720 'uploader': 'Interstellar Movie',
8bdd16b4 2721 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2722 },
481cc733 2723 'playlist_mincount': 21,
358de58c 2724 }, {
2725 'note': 'Playlist with "show unavailable videos" button',
2726 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2727 'info_dict': {
2728 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2729 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2730 'uploader': 'Phim Siêu Nhân Nhật Bản',
2731 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2732 },
da692b79 2733 'playlist_mincount': 200,
5d342002 2734 }, {
da692b79 2735 'note': 'Playlist with unavailable videos in page 7',
5d342002 2736 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2737 'info_dict': {
2738 'title': 'Uploads from BlankTV',
2739 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2740 'uploader': 'BlankTV',
2741 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2742 },
da692b79 2743 'playlist_mincount': 1000,
8bdd16b4 2744 }, {
da692b79 2745 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 2746 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2747 'info_dict': {
2748 'title': 'Data Analysis with Dr Mike Pound',
2749 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2750 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2751 'uploader': 'Computerphile',
deaec5af 2752 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2753 },
2754 'playlist_mincount': 11,
2755 }, {
a0566bbf 2756 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2757 'only_matching': True,
dacb3a86 2758 }, {
da692b79 2759 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
2760 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2761 'info_dict': {
2762 'id': 'FqZTN594JQw',
2763 'ext': 'webm',
2764 'title': "Smiley's People 01 detective, Adventure Series, Action",
2765 'uploader': 'STREEM',
2766 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2767 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2768 'upload_date': '20150526',
2769 'license': 'Standard YouTube License',
2770 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2771 'categories': ['People & Blogs'],
2772 'tags': list,
dbdaaa23 2773 'view_count': int,
dacb3a86
S
2774 'like_count': int,
2775 'dislike_count': int,
2776 },
2777 'params': {
2778 'skip_download': True,
2779 },
13a75688 2780 'skip': 'This video is not available.',
dacb3a86 2781 'add_ie': [YoutubeIE.ie_key()],
481cc733 2782 }, {
8bdd16b4 2783 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2784 'only_matching': True,
66b48727 2785 }, {
8bdd16b4 2786 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2787 'only_matching': True,
a0566bbf 2788 }, {
2789 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2790 'info_dict': {
da692b79 2791 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 2792 'ext': 'mp4',
deaec5af 2793 'title': compat_str,
a0566bbf 2794 'uploader': 'Sky News',
2795 'uploader_id': 'skynews',
2796 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 2797 'upload_date': r're:\d{8}',
2798 'description': compat_str,
a0566bbf 2799 'categories': ['News & Politics'],
2800 'tags': list,
2801 'like_count': int,
2802 'dislike_count': int,
2803 },
2804 'params': {
2805 'skip_download': True,
2806 },
da692b79 2807 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 2808 }, {
2809 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2810 'info_dict': {
2811 'id': 'a48o2S1cPoo',
2812 'ext': 'mp4',
2813 'title': 'The Young Turks - Live Main Show',
2814 'uploader': 'The Young Turks',
2815 'uploader_id': 'TheYoungTurks',
2816 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2817 'upload_date': '20150715',
2818 'license': 'Standard YouTube License',
2819 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2820 'categories': ['News & Politics'],
2821 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2822 'like_count': int,
2823 'dislike_count': int,
2824 },
2825 'params': {
2826 'skip_download': True,
2827 },
2828 'only_matching': True,
2829 }, {
2830 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2831 'only_matching': True,
2832 }, {
2833 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2834 'only_matching': True,
3d3dddc9 2835 }, {
2836 'url': 'https://www.youtube.com/feed/trending',
2837 'only_matching': True,
2838 }, {
3d3dddc9 2839 'url': 'https://www.youtube.com/feed/library',
2840 'only_matching': True,
2841 }, {
3d3dddc9 2842 'url': 'https://www.youtube.com/feed/history',
2843 'only_matching': True,
2844 }, {
3d3dddc9 2845 'url': 'https://www.youtube.com/feed/subscriptions',
2846 'only_matching': True,
2847 }, {
3d3dddc9 2848 'url': 'https://www.youtube.com/feed/watch_later',
2849 'only_matching': True,
2850 }, {
da692b79 2851 'note': 'Recommended - redirects to home page',
3d3dddc9 2852 'url': 'https://www.youtube.com/feed/recommended',
2853 'only_matching': True,
29f7c58a 2854 }, {
da692b79 2855 'note': 'inline playlist with not always working continuations',
29f7c58a 2856 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2857 'only_matching': True,
2858 }, {
2859 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2860 'only_matching': True,
2861 }, {
2862 'url': 'https://www.youtube.com/course',
2863 'only_matching': True,
2864 }, {
2865 'url': 'https://www.youtube.com/zsecurity',
2866 'only_matching': True,
2867 }, {
2868 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2869 'only_matching': True,
2870 }, {
2871 'url': 'https://www.youtube.com/TheYoungTurks/live',
2872 'only_matching': True,
39ed931e 2873 }, {
2874 'url': 'https://www.youtube.com/hashtag/cctv9',
2875 'info_dict': {
2876 'id': 'cctv9',
2877 'title': '#cctv9',
2878 },
2879 'playlist_mincount': 350,
201c1459 2880 }, {
2881 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2882 'only_matching': True,
9297939e 2883 }, {
da692b79 2884 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 2885 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2886 'only_matching': True
fe03a6cd 2887 }, {
2888 'note': '/browse/ should redirect to /channel/',
2889 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
2890 'only_matching': True
2891 }, {
2892 'note': 'VLPL, should redirect to playlist?list=PL...',
2893 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2894 'info_dict': {
2895 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2896 'uploader': 'NoCopyrightSounds',
2897 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
2898 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
2899 'title': 'NCS Releases',
2900 },
2901 'playlist_mincount': 166,
18db7548 2902 }, {
2903 'note': 'Topic, should redirect to playlist?list=UU...',
2904 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
2905 'info_dict': {
2906 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
2907 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
2908 'title': 'Uploads from Royalty Free Music - Topic',
2909 'uploader': 'Royalty Free Music - Topic',
2910 },
2911 'expected_warnings': [
2912 'A channel/user page was given',
2913 'The URL does not have a videos tab',
2914 ],
2915 'playlist_mincount': 101,
2916 }, {
2917 'note': 'Topic without a UU playlist',
2918 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
2919 'info_dict': {
2920 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
2921 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
2922 },
2923 'expected_warnings': [
2924 'A channel/user page was given',
2925 'The URL does not have a videos tab',
2926 'Falling back to channel URL',
2927 ],
2928 'playlist_mincount': 9,
29f7c58a 2929 }]
2930
2931 @classmethod
2932 def suitable(cls, url):
2933 return False if YoutubeIE.suitable(url) else super(
2934 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2935
2936 def _extract_channel_id(self, webpage):
2937 channel_id = self._html_search_meta(
2938 'channelId', webpage, 'channel id', default=None)
2939 if channel_id:
2940 return channel_id
2941 channel_url = self._html_search_meta(
2942 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2943 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2944 'twitter:app:url:googleplay'), webpage, 'channel url')
2945 return self._search_regex(
2946 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2947 channel_url, 'channel id')
15f6397c 2948
8bdd16b4 2949 @staticmethod
cd7c66cf 2950 def _extract_basic_item_renderer(item):
2951 # Modified from _extract_grid_item_renderer
201c1459 2952 known_basic_renderers = (
2953 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 2954 )
2955 for key, renderer in item.items():
201c1459 2956 if not isinstance(renderer, dict):
cd7c66cf 2957 continue
201c1459 2958 elif key in known_basic_renderers:
2959 return renderer
2960 elif key.startswith('grid') and key.endswith('Renderer'):
2961 return renderer
8bdd16b4 2962
8bdd16b4 2963 def _grid_entries(self, grid_renderer):
2964 for item in grid_renderer['items']:
2965 if not isinstance(item, dict):
39b62db1 2966 continue
cd7c66cf 2967 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2968 if not isinstance(renderer, dict):
2969 continue
2970 title = try_get(
201c1459 2971 renderer, (lambda x: x['title']['runs'][0]['text'],
2972 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 2973 # playlist
2974 playlist_id = renderer.get('playlistId')
2975 if playlist_id:
2976 yield self.url_result(
2977 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2978 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2979 video_title=title)
201c1459 2980 continue
8bdd16b4 2981 # video
2982 video_id = renderer.get('videoId')
2983 if video_id:
2984 yield self._extract_video(renderer)
201c1459 2985 continue
8bdd16b4 2986 # channel
2987 channel_id = renderer.get('channelId')
2988 if channel_id:
2989 title = try_get(
2990 renderer, lambda x: x['title']['simpleText'], compat_str)
2991 yield self.url_result(
2992 'https://www.youtube.com/channel/%s' % channel_id,
2993 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 2994 continue
2995 # generic endpoint URL support
2996 ep_url = urljoin('https://www.youtube.com/', try_get(
2997 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
2998 compat_str))
2999 if ep_url:
3000 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3001 if ie.suitable(ep_url):
3002 yield self.url_result(
3003 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3004 break
8bdd16b4 3005
3d3dddc9 3006 def _shelf_entries_from_content(self, shelf_renderer):
3007 content = shelf_renderer.get('content')
3008 if not isinstance(content, dict):
8bdd16b4 3009 return
cd7c66cf 3010 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3011 if renderer:
3012 # TODO: add support for nested playlists so each shelf is processed
3013 # as separate playlist
3014 # TODO: this includes only first N items
3015 for entry in self._grid_entries(renderer):
3016 yield entry
3017 renderer = content.get('horizontalListRenderer')
3018 if renderer:
3019 # TODO
3020 pass
8bdd16b4 3021
29f7c58a 3022 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3023 ep = try_get(
3024 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3025 compat_str)
3026 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3027 if shelf_url:
29f7c58a 3028 # Skipping links to another channels, note that checking for
3029 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3030 # will not work
3031 if skip_channels and '/channels?' in shelf_url:
3032 return
3d3dddc9 3033 title = try_get(
3034 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3035 yield self.url_result(shelf_url, video_title=title)
3036 # Shelf may not contain shelf URL, fallback to extraction from content
3037 for entry in self._shelf_entries_from_content(shelf_renderer):
3038 yield entry
c5e8d7af 3039
8bdd16b4 3040 def _playlist_entries(self, video_list_renderer):
3041 for content in video_list_renderer['contents']:
3042 if not isinstance(content, dict):
3043 continue
3044 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3045 if not isinstance(renderer, dict):
3046 continue
3047 video_id = renderer.get('videoId')
3048 if not video_id:
3049 continue
3050 yield self._extract_video(renderer)
07aeced6 3051
3462ffa8 3052 def _rich_entries(self, rich_grid_renderer):
3053 renderer = try_get(
70d5c17b 3054 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3055 video_id = renderer.get('videoId')
3056 if not video_id:
3057 return
3058 yield self._extract_video(renderer)
3059
8bdd16b4 3060 def _video_entry(self, video_renderer):
3061 video_id = video_renderer.get('videoId')
3062 if video_id:
3063 return self._extract_video(video_renderer)
dacb3a86 3064
8bdd16b4 3065 def _post_thread_entries(self, post_thread_renderer):
3066 post_renderer = try_get(
3067 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3068 if not post_renderer:
3069 return
3070 # video attachment
3071 video_renderer = try_get(
895b0931 3072 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3073 video_id = video_renderer.get('videoId')
3074 if video_id:
3075 entry = self._extract_video(video_renderer)
8bdd16b4 3076 if entry:
3077 yield entry
895b0931 3078 # playlist attachment
3079 playlist_id = try_get(
3080 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3081 if playlist_id:
3082 yield self.url_result(
e28f1c0a 3083 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3084 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3085 # inline video links
3086 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3087 for run in runs:
3088 if not isinstance(run, dict):
3089 continue
3090 ep_url = try_get(
3091 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3092 if not ep_url:
3093 continue
3094 if not YoutubeIE.suitable(ep_url):
3095 continue
3096 ep_video_id = YoutubeIE._match_id(ep_url)
3097 if video_id == ep_video_id:
3098 continue
895b0931 3099 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3100
8bdd16b4 3101 def _post_thread_continuation_entries(self, post_thread_continuation):
3102 contents = post_thread_continuation.get('contents')
3103 if not isinstance(contents, list):
3104 return
3105 for content in contents:
3106 renderer = content.get('backstagePostThreadRenderer')
3107 if not isinstance(renderer, dict):
3108 continue
3109 for entry in self._post_thread_entries(renderer):
3110 yield entry
07aeced6 3111
39ed931e 3112 r''' # unused
3113 def _rich_grid_entries(self, contents):
3114 for content in contents:
3115 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3116 if video_renderer:
3117 entry = self._video_entry(video_renderer)
3118 if entry:
3119 yield entry
3120 '''
3121
29f7c58a 3122 @staticmethod
3123 def _build_continuation_query(continuation, ctp=None):
3124 query = {
3125 'ctoken': continuation,
3126 'continuation': continuation,
3127 }
3128 if ctp:
3129 query['itct'] = ctp
3130 return query
3131
8bdd16b4 3132 @staticmethod
3133 def _extract_next_continuation_data(renderer):
3134 next_continuation = try_get(
3135 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3136 if not next_continuation:
3137 return
3138 continuation = next_continuation.get('continuation')
3139 if not continuation:
3140 return
3141 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3142 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3143
8bdd16b4 3144 @classmethod
3145 def _extract_continuation(cls, renderer):
3146 next_continuation = cls._extract_next_continuation_data(renderer)
3147 if next_continuation:
3148 return next_continuation
cc2db878 3149 contents = []
3150 for key in ('contents', 'items'):
3151 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3152 for content in contents:
3153 if not isinstance(content, dict):
3154 continue
3155 continuation_ep = try_get(
3156 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3157 dict)
3158 if not continuation_ep:
3159 continue
3160 continuation = try_get(
3161 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3162 if not continuation:
3163 continue
3164 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3165 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3166
f4f751af 3167 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3168
70d5c17b 3169 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3170 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3171 for content in contents:
3172 if not isinstance(content, dict):
8bdd16b4 3173 continue
70d5c17b 3174 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3175 if not is_renderer:
70d5c17b 3176 renderer = content.get('richItemRenderer')
3462ffa8 3177 if renderer:
3178 for entry in self._rich_entries(renderer):
3179 yield entry
3180 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3181 continue
3462ffa8 3182 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3183 for isr_content in isr_contents:
3184 if not isinstance(isr_content, dict):
3185 continue
69184e41 3186
3187 known_renderers = {
3188 'playlistVideoListRenderer': self._playlist_entries,
3189 'gridRenderer': self._grid_entries,
3190 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3191 'backstagePostThreadRenderer': self._post_thread_entries,
3192 'videoRenderer': lambda x: [self._video_entry(x)],
3193 }
3194 for key, renderer in isr_content.items():
3195 if key not in known_renderers:
3196 continue
3197 for entry in known_renderers[key](renderer):
3198 if entry:
3199 yield entry
3462ffa8 3200 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3201 break
70d5c17b 3202
3462ffa8 3203 if not continuation_list[0]:
3204 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3205
3206 if not continuation_list[0]:
3207 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3208
3209 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3210 tab_content = try_get(tab, lambda x: x['content'], dict)
3211 if not tab_content:
3212 return
3462ffa8 3213 parent_renderer = (
29f7c58a 3214 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3215 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3216 for entry in extract_entries(parent_renderer):
3217 yield entry
3462ffa8 3218 continuation = continuation_list[0]
f4f751af 3219 context = self._extract_context(ytcfg)
3220 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3221
8bdd16b4 3222 for page_num in itertools.count(1):
3223 if not continuation:
3224 break
79360d99 3225 query = {
3226 'continuation': continuation['continuation'],
3227 'clickTracking': {'clickTrackingParams': continuation['itct']}
3228 }
f4f751af 3229 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3230 response = self._extract_response(
3231 item_id='%s page %s' % (item_id, page_num),
3232 query=query, headers=headers, ytcfg=ytcfg,
3233 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3234
3235 if not response:
8bdd16b4 3236 break
f4f751af 3237 visitor_data = try_get(
3238 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3239
69184e41 3240 known_continuation_renderers = {
3241 'playlistVideoListContinuation': self._playlist_entries,
3242 'gridContinuation': self._grid_entries,
3243 'itemSectionContinuation': self._post_thread_continuation_entries,
3244 'sectionListContinuation': extract_entries, # for feeds
3245 }
8bdd16b4 3246 continuation_contents = try_get(
69184e41 3247 response, lambda x: x['continuationContents'], dict) or {}
3248 continuation_renderer = None
3249 for key, value in continuation_contents.items():
3250 if key not in known_continuation_renderers:
3462ffa8 3251 continue
69184e41 3252 continuation_renderer = value
3253 continuation_list = [None]
3254 for entry in known_continuation_renderers[key](continuation_renderer):
3255 yield entry
3256 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3257 break
3258 if continuation_renderer:
3259 continue
c5e8d7af 3260
a1b535bd 3261 known_renderers = {
3262 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3263 'gridVideoRenderer': (self._grid_entries, 'items'),
3264 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3265 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3266 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3267 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3268 }
cce889b9 3269 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3270 continuation_items = try_get(
cce889b9 3271 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3272 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3273 video_items_renderer = None
3274 for key, value in continuation_item.items():
3275 if key not in known_renderers:
8bdd16b4 3276 continue
a1b535bd 3277 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3278 continuation_list = [None]
a1b535bd 3279 for entry in known_renderers[key][0](video_items_renderer):
3280 yield entry
9ba5705a 3281 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3282 break
3283 if video_items_renderer:
3284 continue
8bdd16b4 3285 break
9558dcec 3286
8bdd16b4 3287 @staticmethod
3288 def _extract_selected_tab(tabs):
3289 for tab in tabs:
cd684175 3290 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3291 if renderer.get('selected') is True:
3292 return renderer
2b3c2546 3293 else:
8bdd16b4 3294 raise ExtractorError('Unable to find selected tab')
b82f815f 3295
8bdd16b4 3296 @staticmethod
3297 def _extract_uploader(data):
3298 uploader = {}
3299 sidebar_renderer = try_get(
3300 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3301 if sidebar_renderer:
3302 for item in sidebar_renderer:
3303 if not isinstance(item, dict):
3304 continue
3305 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3306 if not isinstance(renderer, dict):
3307 continue
3308 owner = try_get(
3309 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3310 if owner:
3311 uploader['uploader'] = owner.get('text')
3312 uploader['uploader_id'] = try_get(
3313 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3314 uploader['uploader_url'] = urljoin(
3315 'https://www.youtube.com/',
3316 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3317 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3318
d069eca7 3319 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3320 playlist_id = title = description = channel_url = channel_name = channel_id = None
3321 thumbnails_list = tags = []
3322
8bdd16b4 3323 selected_tab = self._extract_selected_tab(tabs)
3324 renderer = try_get(
3325 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3326 if renderer:
b60419c5 3327 channel_name = renderer.get('title')
3328 channel_url = renderer.get('channelUrl')
3329 channel_id = renderer.get('externalId')
39ed931e 3330 else:
64c0d954 3331 renderer = try_get(
3332 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3333
8bdd16b4 3334 if renderer:
3335 title = renderer.get('title')
ecc97af3 3336 description = renderer.get('description', '')
b60419c5 3337 playlist_id = channel_id
3338 tags = renderer.get('keywords', '').split()
3339 thumbnails_list = (
3340 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3341 or try_get(
3342 data,
3343 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3344 list)
b60419c5 3345 or [])
3346
3347 thumbnails = []
3348 for t in thumbnails_list:
3349 if not isinstance(t, dict):
3350 continue
3351 thumbnail_url = url_or_none(t.get('url'))
3352 if not thumbnail_url:
3353 continue
3354 thumbnails.append({
3355 'url': thumbnail_url,
3356 'width': int_or_none(t.get('width')),
3357 'height': int_or_none(t.get('height')),
3358 })
3462ffa8 3359 if playlist_id is None:
70d5c17b 3360 playlist_id = item_id
3361 if title is None:
39ed931e 3362 title = (
3363 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3364 or playlist_id)
b60419c5 3365 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3366 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3367
3368 metadata = {
3369 'playlist_id': playlist_id,
3370 'playlist_title': title,
3371 'playlist_description': description,
3372 'uploader': channel_name,
3373 'uploader_id': channel_id,
3374 'uploader_url': channel_url,
3375 'thumbnails': thumbnails,
3376 'tags': tags,
3377 }
3378 if not channel_id:
3379 metadata.update(self._extract_uploader(data))
3380 metadata.update({
3381 'channel': metadata['uploader'],
3382 'channel_id': metadata['uploader_id'],
3383 'channel_url': metadata['uploader_url']})
3384 return self.playlist_result(
d069eca7
M
3385 self._entries(
3386 selected_tab, playlist_id,
3387 self._extract_identity_token(webpage, item_id),
f4f751af 3388 self._extract_account_syncid(data),
3389 self._extract_ytcfg(item_id, webpage)),
b60419c5 3390 **metadata)
73c4ac2c 3391
79360d99 3392 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3393 first_id = last_id = None
79360d99 3394 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3395 headers = self._generate_api_headers(
3396 ytcfg, account_syncid=self._extract_account_syncid(data),
3397 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3398 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3399 for page_num in itertools.count(1):
cd7c66cf 3400 videos = list(self._playlist_entries(playlist))
3401 if not videos:
3402 return
2be71994 3403 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3404 if start >= len(videos):
3405 return
3406 for video in videos[start:]:
3407 if video['id'] == first_id:
3408 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3409 return
3410 yield video
3411 first_id = first_id or videos[0]['id']
3412 last_id = videos[-1]['id']
79360d99 3413 watch_endpoint = try_get(
3414 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3415 query = {
3416 'playlistId': playlist_id,
3417 'videoId': watch_endpoint.get('videoId') or last_id,
3418 'index': watch_endpoint.get('index') or len(videos),
3419 'params': watch_endpoint.get('params') or 'OAE%3D'
3420 }
3421 response = self._extract_response(
3422 item_id='%s page %d' % (playlist_id, page_num),
3423 query=query,
3424 ep='next',
3425 headers=headers,
3426 check_get_keys='contents'
3427 )
cd7c66cf 3428 playlist = try_get(
79360d99 3429 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3430
79360d99 3431 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3432 title = playlist.get('title') or try_get(
3433 data, lambda x: x['titleText']['simpleText'], compat_str)
3434 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3435
3436 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3437 playlist_url = urljoin(url, try_get(
3438 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3439 compat_str))
3440 if playlist_url and playlist_url != url:
3441 return self.url_result(
3442 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3443 video_title=title)
cd7c66cf 3444
8bdd16b4 3445 return self.playlist_result(
79360d99 3446 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3447 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3448
95c01b6c 3449 @staticmethod
3450 def _extract_alerts(data):
3451 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3452 if not isinstance(alert_dict, dict):
3453 continue
3454 for alert in alert_dict.values():
3455 alert_type = alert.get('type')
3456 if not alert_type:
02ced43c 3457 continue
95c01b6c 3458 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
3459 if message:
3460 yield alert_type, message
3461 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3462 message += try_get(run, lambda x: x['text'], compat_str)
3463 if message:
3464 yield alert_type, message
3465
3466 def _report_alerts(self, alerts, expected=True):
3ffc7c89 3467 errors = []
3468 warnings = []
95c01b6c 3469 for alert_type, alert_message in alerts:
f3eaa8dd 3470 if alert_type.lower() == 'error':
3ffc7c89 3471 errors.append([alert_type, alert_message])
f3eaa8dd 3472 else:
3ffc7c89 3473 warnings.append([alert_type, alert_message])
f3eaa8dd 3474
3ffc7c89 3475 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3476 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3477 if errors:
3478 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3479
95c01b6c 3480 def _extract_and_report_alerts(self, data, *args, **kwargs):
3481 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
3482
358de58c 3483 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3484 """
3485 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3486 """
3487 sidebar_renderer = try_get(
5d342002 3488 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3489 if not sidebar_renderer:
3490 return
3491 browse_id = params = None
358de58c 3492 for item in sidebar_renderer:
3493 if not isinstance(item, dict):
3494 continue
3495 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3496 menu_renderer = try_get(
3497 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3498 for menu_item in menu_renderer:
3499 if not isinstance(menu_item, dict):
3500 continue
3501 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3502 text = try_get(
3503 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3504 if not text or text.lower() != 'show unavailable videos':
3505 continue
3506 browse_endpoint = try_get(
3507 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3508 browse_id = browse_endpoint.get('browseId')
3509 params = browse_endpoint.get('params')
5d342002 3510 break
3511
3512 ytcfg = self._extract_ytcfg(item_id, webpage)
3513 headers = self._generate_api_headers(
3514 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3515 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3516 visitor_data=try_get(
3517 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3518 query = {
3519 'params': params or 'wgYCCAA=',
3520 'browseId': browse_id or 'VL%s' % item_id
3521 }
3522 return self._extract_response(
3523 item_id=item_id, headers=headers, query=query,
3524 check_get_keys='contents', fatal=False,
3525 note='Downloading API JSON with unavailable videos')
358de58c 3526
79360d99 3527 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3528 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3529 response = None
3530 last_error = None
3531 count = -1
a06916d9 3532 retries = self.get_param('extractor_retries', 3)
79360d99 3533 if check_get_keys is None:
3534 check_get_keys = []
3535 while count < retries:
3536 count += 1
3537 if last_error:
3538 self.report_warning('%s. Retrying ...' % last_error)
3539 try:
3540 response = self._call_api(
3541 ep=ep, fatal=True, headers=headers,
358de58c 3542 video_id=item_id, query=query,
79360d99 3543 context=self._extract_context(ytcfg),
3544 api_key=self._extract_api_key(ytcfg),
3545 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3546 except ExtractorError as e:
3547 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3548 # Downloading page may result in intermittent 5xx HTTP error
3549 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3550 last_error = 'HTTP Error %s' % e.cause.code
3551 if count < retries:
3552 continue
358de58c 3553 if fatal:
3554 raise
3555 else:
3556 self.report_warning(error_to_compat_str(e))
3557 return
3558
79360d99 3559 else:
3560 # Youtube may send alerts if there was an issue with the continuation page
95c01b6c 3561 self._extract_and_report_alerts(response, expected=False)
79360d99 3562 if not check_get_keys or dict_get(response, check_get_keys):
3563 break
3564 # Youtube sometimes sends incomplete data
3565 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3566 last_error = 'Incomplete data received'
3567 if count >= retries:
358de58c 3568 if fatal:
3569 raise ExtractorError(last_error)
3570 else:
3571 self.report_warning(last_error)
3572 return
79360d99 3573 return response
3574
cd7c66cf 3575 def _extract_webpage(self, url, item_id):
a06916d9 3576 retries = self.get_param('extractor_retries', 3)
62bff2c1 3577 count = -1
c705177d 3578 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3579 while count < retries:
62bff2c1 3580 count += 1
14fdfea9 3581 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3582 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3583 if count:
c705177d 3584 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3585 webpage = self._download_webpage(
3586 url, item_id,
cd7c66cf 3587 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3588 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3589 if data.get('contents') or data.get('currentVideoEndpoint'):
3590 break
95c01b6c 3591 # Extract alerts here only when there is error
3592 self._extract_and_report_alerts(data)
c705177d 3593 if count >= retries:
6a39ee13 3594 raise ExtractorError(last_error)
cd7c66cf 3595 return webpage, data
3596
9297939e 3597 @staticmethod
3598 def _smuggle_data(entries, data):
3599 for entry in entries:
3600 if data:
3601 entry['url'] = smuggle_url(entry['url'], data)
3602 yield entry
3603
cd7c66cf 3604 def _real_extract(self, url):
9297939e 3605 url, smuggled_data = unsmuggle_url(url, {})
3606 if self.is_music_url(url):
3607 smuggled_data['is_music_url'] = True
fe03a6cd 3608 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3609 if info_dict.get('entries'):
3610 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3611 return info_dict
3612
fe03a6cd 3613 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3614
3615 def __real_extract(self, url, smuggled_data):
cd7c66cf 3616 item_id = self._match_id(url)
3617 url = compat_urlparse.urlunparse(
3618 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3619 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3620
fe03a6cd 3621 def get_mobj(url):
3622 mobj = self._url_re.match(url).groupdict()
3623 mobj.update((k, '') for k,v in mobj.items() if v is None)
3624 return mobj
3625
3626 mobj = get_mobj(url)
3627 # Youtube returns incomplete data if tabname is not lower case
3628 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
3629
3630 if is_channel:
3631 if smuggled_data.get('is_music_url'):
3632 if item_id[:2] == 'VL':
3633 # Youtube music VL channels have an equivalent playlist
3634 item_id = item_id[2:]
3635 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
3636 elif mobj['channel_type'] == 'browse':
3637 # Youtube music /browse/ should be changed to /channel/
3638 pre = 'https://www.youtube.com/channel/%s' % item_id
3639 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
3640 # Home URLs should redirect to /videos/
6a39ee13 3641 self.report_warning(
cd7c66cf 3642 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3643 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 3644 tab = '/videos'
3645
3646 url = ''.join((pre, tab, post))
3647 mobj = get_mobj(url)
cd7c66cf 3648
3649 # Handle both video/playlist URLs
201c1459 3650 qs = parse_qs(url)
cd7c66cf 3651 video_id = qs.get('v', [None])[0]
3652 playlist_id = qs.get('list', [None])[0]
3653
fe03a6cd 3654 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 3655 if not playlist_id:
fe03a6cd 3656 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 3657 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 3658 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 3659 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3660 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 3661 mobj = get_mobj(url)
cd7c66cf 3662
3663 if video_id and playlist_id:
a06916d9 3664 if self.get_param('noplaylist'):
cd7c66cf 3665 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3666 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3667 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3668
3669 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3670
18db7548 3671 tabs = try_get(
3672 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3673 if tabs:
3674 selected_tab = self._extract_selected_tab(tabs)
3675 tab_name = selected_tab.get('title', '')
3676 if (mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]
3677 and 'no-youtube-channel-redirect' not in compat_opts):
3678 if not mobj['not_channel'] and item_id[:2] == 'UC':
3679 # Topic channels don't have /videos. Use the equivalent playlist instead
3680 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
3681 pl_id = 'UU%s' % item_id[2:]
3682 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
3683 try:
3684 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
3685 for alert_type, alert_message in self._extract_alerts(pl_data):
3686 if alert_type == 'error':
3687 raise ExtractorError('Youtube said: %s' % alert_message)
3688 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
3689 except ExtractorError:
3690 self.report_warning('The playlist gave error. Falling back to channel URL')
3691 else:
3692 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
3693
3694 self.write_debug('Final URL: %s' % url)
3695
358de58c 3696 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3697 if 'no-youtube-unavailable-videos' not in compat_opts:
3698 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 3699 self._extract_and_report_alerts(data)
358de58c 3700
8bdd16b4 3701 tabs = try_get(
3702 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3703 if tabs:
d069eca7 3704 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3705
8bdd16b4 3706 playlist = try_get(
3707 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3708 if playlist:
79360d99 3709 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3710
a0566bbf 3711 video_id = try_get(
3712 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3713 compat_str) or video_id
8bdd16b4 3714 if video_id:
6a39ee13 3715 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3716 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3717
8bdd16b4 3718 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3719
c5e8d7af 3720
8bdd16b4 3721class YoutubePlaylistIE(InfoExtractor):
3722 IE_DESC = 'YouTube.com playlists'
3723 _VALID_URL = r'''(?x)(?:
3724 (?:https?://)?
3725 (?:\w+\.)?
3726 (?:
3727 (?:
3728 youtube(?:kids)?\.com|
29f7c58a 3729 invidio\.us
8bdd16b4 3730 )
3731 /.*?\?.*?\blist=
3732 )?
3733 (?P<id>%(playlist_id)s)
3734 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3735 IE_NAME = 'youtube:playlist'
cdc628a4 3736 _TESTS = [{
8bdd16b4 3737 'note': 'issue #673',
3738 'url': 'PLBB231211A4F62143',
cdc628a4 3739 'info_dict': {
8bdd16b4 3740 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3741 'id': 'PLBB231211A4F62143',
3742 'uploader': 'Wickydoo',
3743 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3744 },
3745 'playlist_mincount': 29,
3746 }, {
3747 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3748 'info_dict': {
3749 'title': 'YDL_safe_search',
3750 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3751 },
3752 'playlist_count': 2,
3753 'skip': 'This playlist is private',
9558dcec 3754 }, {
8bdd16b4 3755 'note': 'embedded',
3756 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3757 'playlist_count': 4,
9558dcec 3758 'info_dict': {
8bdd16b4 3759 'title': 'JODA15',
3760 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3761 'uploader': 'milan',
3762 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3763 }
cdc628a4 3764 }, {
8bdd16b4 3765 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3766 'playlist_mincount': 982,
3767 'info_dict': {
3768 'title': '2018 Chinese New Singles (11/6 updated)',
3769 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3770 'uploader': 'LBK',
3771 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3772 }
daa0df9e 3773 }, {
29f7c58a 3774 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3775 'only_matching': True,
3776 }, {
3777 # music album playlist
3778 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3779 'only_matching': True,
3780 }]
3781
3782 @classmethod
3783 def suitable(cls, url):
201c1459 3784 if YoutubeTabIE.suitable(url):
3785 return False
1bdae7d3 3786 # Hack for lazy extractors until more generic solution is implemented
3787 # (see #28780)
3788 from .youtube import parse_qs
201c1459 3789 qs = parse_qs(url)
3790 if qs.get('v', [None])[0]:
3791 return False
3792 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3793
3794 def _real_extract(self, url):
3795 playlist_id = self._match_id(url)
9297939e 3796 is_music_url = self.is_music_url(url)
3797 url = update_url_query(
3798 'https://www.youtube.com/playlist',
3799 parse_qs(url) or {'list': playlist_id})
3800 if is_music_url:
3801 url = smuggle_url(url, {'is_music_url': True})
3802 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 3803
3804
3805class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3806 IE_DESC = 'youtu.be'
29f7c58a 3807 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3808 _TESTS = [{
8bdd16b4 3809 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3810 'info_dict': {
3811 'id': 'yeWKywCrFtk',
3812 'ext': 'mp4',
3813 'title': 'Small Scale Baler and Braiding Rugs',
3814 'uploader': 'Backus-Page House Museum',
3815 'uploader_id': 'backuspagemuseum',
3816 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3817 'upload_date': '20161008',
3818 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3819 'categories': ['Nonprofits & Activism'],
3820 'tags': list,
3821 'like_count': int,
3822 'dislike_count': int,
3823 },
3824 'params': {
3825 'noplaylist': True,
3826 'skip_download': True,
3827 },
39e7107d 3828 }, {
8bdd16b4 3829 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3830 'only_matching': True,
cdc628a4
PH
3831 }]
3832
8bdd16b4 3833 def _real_extract(self, url):
29f7c58a 3834 mobj = re.match(self._VALID_URL, url)
3835 video_id = mobj.group('id')
3836 playlist_id = mobj.group('playlist_id')
8bdd16b4 3837 return self.url_result(
29f7c58a 3838 update_url_query('https://www.youtube.com/watch', {
3839 'v': video_id,
3840 'list': playlist_id,
3841 'feature': 'youtu.be',
3842 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3843
3844
3845class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3846 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3847 _VALID_URL = r'ytuser:(?P<id>.+)'
3848 _TESTS = [{
3849 'url': 'ytuser:phihag',
3850 'only_matching': True,
3851 }]
3852
3853 def _real_extract(self, url):
3854 user_id = self._match_id(url)
3855 return self.url_result(
3856 'https://www.youtube.com/user/%s' % user_id,
3857 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3858
b05654f0 3859
3d3dddc9 3860class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3861 IE_NAME = 'youtube:favorites'
3862 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3863 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3864 _LOGIN_REQUIRED = True
3865 _TESTS = [{
3866 'url': ':ytfav',
3867 'only_matching': True,
3868 }, {
3869 'url': ':ytfavorites',
3870 'only_matching': True,
3871 }]
3872
3873 def _real_extract(self, url):
3874 return self.url_result(
3875 'https://www.youtube.com/playlist?list=LL',
3876 ie=YoutubeTabIE.ie_key())
3877
3878
79360d99 3879class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3880 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3881 # there doesn't appear to be a real limit, for example if you search for
3882 # 'python' you get more than 8.000.000 results
3883 _MAX_RESULTS = float('inf')
78caa52a 3884 IE_NAME = 'youtube:search'
b05654f0 3885 _SEARCH_KEY = 'ytsearch'
6c894ea1 3886 _SEARCH_PARAMS = None
9dd8e46a 3887 _TESTS = []
b05654f0 3888
6c894ea1 3889 def _entries(self, query, n):
a5c56234 3890 data = {'query': query}
6c894ea1
U
3891 if self._SEARCH_PARAMS:
3892 data['params'] = self._SEARCH_PARAMS
3893 total = 0
3894 for page_num in itertools.count(1):
79360d99 3895 search = self._extract_response(
3896 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3897 check_get_keys=('contents', 'onResponseReceivedCommands')
3898 )
6c894ea1 3899 if not search:
b4c08069 3900 break
6c894ea1
U
3901 slr_contents = try_get(
3902 search,
3903 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3904 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3905 list)
3906 if not slr_contents:
a22b2fd1 3907 break
0366ae87 3908
0366ae87
M
3909 # Youtube sometimes adds promoted content to searches,
3910 # changing the index location of videos and token.
3911 # So we search through all entries till we find them.
30a074c2 3912 continuation_token = None
3913 for slr_content in slr_contents:
a96c6d15 3914 if continuation_token is None:
3915 continuation_token = try_get(
3916 slr_content,
3917 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3918 compat_str)
3919
30a074c2 3920 isr_contents = try_get(
3921 slr_content,
3922 lambda x: x['itemSectionRenderer']['contents'],
3923 list)
9da76d30 3924 if not isr_contents:
30a074c2 3925 continue
3926 for content in isr_contents:
3927 if not isinstance(content, dict):
3928 continue
3929 video = content.get('videoRenderer')
3930 if not isinstance(video, dict):
3931 continue
3932 video_id = video.get('videoId')
3933 if not video_id:
3934 continue
3935
3936 yield self._extract_video(video)
3937 total += 1
3938 if total == n:
3939 return
0366ae87 3940
0366ae87 3941 if not continuation_token:
6c894ea1 3942 break
0366ae87 3943 data['continuation'] = continuation_token
b05654f0 3944
6c894ea1
U
3945 def _get_n_results(self, query, n):
3946 """Get a specified number of results for a query"""
3947 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3948
c9ae7b95 3949
a3dd9248 3950class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3951 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3952 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3953 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3954 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3955
c9ae7b95 3956
386e1dd9 3957class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3958 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3959 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3960 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3961 # _MAX_RESULTS = 100
3462ffa8 3962 _TESTS = [{
3963 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3964 'playlist_mincount': 5,
3965 'info_dict': {
3966 'title': 'youtube-dl test video',
3967 }
3968 }, {
3969 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3970 'only_matching': True,
3971 }]
3972
386e1dd9 3973 @classmethod
3974 def _make_valid_url(cls):
3975 return cls._VALID_URL
3976
3462ffa8 3977 def _real_extract(self, url):
386e1dd9 3978 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3979 query = (qs.get('search_query') or qs.get('q'))[0]
3980 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3981 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3982
3983
3984class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3985 """
25f14e9f 3986 Base class for feed extractors
3d3dddc9 3987 Subclasses must define the _FEED_NAME property.
d7ae0639 3988 """
b2e8bc1b 3989 _LOGIN_REQUIRED = True
ef2f3c7f 3990 _TESTS = []
d7ae0639
JMF
3991
3992 @property
3993 def IE_NAME(self):
78caa52a 3994 return 'youtube:%s' % self._FEED_NAME
04cc9617 3995
81f0259b 3996 def _real_initialize(self):
b2e8bc1b 3997 self._login()
81f0259b 3998
3853309f 3999 def _real_extract(self, url):
3d3dddc9 4000 return self.url_result(
4001 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4002 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4003
4004
ef2f3c7f 4005class YoutubeWatchLaterIE(InfoExtractor):
4006 IE_NAME = 'youtube:watchlater'
70d5c17b 4007 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4008 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4009 _TESTS = [{
8bdd16b4 4010 'url': ':ytwatchlater',
bc7a9cd8
S
4011 'only_matching': True,
4012 }]
25f14e9f
S
4013
4014 def _real_extract(self, url):
ef2f3c7f 4015 return self.url_result(
4016 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4017
4018
25f14e9f
S
4019class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4020 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4021 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4022 _FEED_NAME = 'recommended'
3d3dddc9 4023 _TESTS = [{
4024 'url': ':ytrec',
4025 'only_matching': True,
4026 }, {
4027 'url': ':ytrecommended',
4028 'only_matching': True,
4029 }, {
4030 'url': 'https://youtube.com',
4031 'only_matching': True,
4032 }]
1ed5b5c9 4033
1ed5b5c9 4034
25f14e9f 4035class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4036 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4037 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4038 _FEED_NAME = 'subscriptions'
3d3dddc9 4039 _TESTS = [{
4040 'url': ':ytsubs',
4041 'only_matching': True,
4042 }, {
4043 'url': ':ytsubscriptions',
4044 'only_matching': True,
4045 }]
1ed5b5c9 4046
1ed5b5c9 4047
25f14e9f 4048class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4049 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4050 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4051 _FEED_NAME = 'history'
3d3dddc9 4052 _TESTS = [{
4053 'url': ':ythistory',
4054 'only_matching': True,
4055 }]
1ed5b5c9
JMF
4056
4057
15870e90
PH
4058class YoutubeTruncatedURLIE(InfoExtractor):
4059 IE_NAME = 'youtube:truncated_url'
4060 IE_DESC = False # Do not list
975d35db 4061 _VALID_URL = r'''(?x)
b95aab84
PH
4062 (?:https?://)?
4063 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4064 (?:watch\?(?:
c4808c60 4065 feature=[a-z_]+|
b95aab84
PH
4066 annotation_id=annotation_[^&]+|
4067 x-yt-cl=[0-9]+|
c1708b89 4068 hl=[^&]*|
287be8c6 4069 t=[0-9]+
b95aab84
PH
4070 )?
4071 |
4072 attribution_link\?a=[^&]+
4073 )
4074 $
975d35db 4075 '''
15870e90 4076
c4808c60 4077 _TESTS = [{
2d3d2997 4078 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4079 'only_matching': True,
dc2fc736 4080 }, {
2d3d2997 4081 'url': 'https://www.youtube.com/watch?',
dc2fc736 4082 'only_matching': True,
b95aab84
PH
4083 }, {
4084 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4085 'only_matching': True,
4086 }, {
4087 'url': 'https://www.youtube.com/watch?feature=foo',
4088 'only_matching': True,
c1708b89
PH
4089 }, {
4090 'url': 'https://www.youtube.com/watch?hl=en-GB',
4091 'only_matching': True,
287be8c6
PH
4092 }, {
4093 'url': 'https://www.youtube.com/watch?t=2372',
4094 'only_matching': True,
c4808c60
PH
4095 }]
4096
15870e90
PH
4097 def _real_extract(self, url):
4098 raise ExtractorError(
78caa52a
PH
4099 'Did you forget to quote the URL? Remember that & is a meta '
4100 'character in most shells, so you want to put the URL in quotes, '
3867038a 4101 'like youtube-dl '
2d3d2997 4102 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4103 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4104 expected=True)
772fd5cc
PH
4105
4106
4107class YoutubeTruncatedIDIE(InfoExtractor):
4108 IE_NAME = 'youtube:truncated_id'
4109 IE_DESC = False # Do not list
b95aab84 4110 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4111
4112 _TESTS = [{
4113 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4114 'only_matching': True,
4115 }]
4116
4117 def _real_extract(self, url):
4118 video_id = self._match_id(url)
4119 raise ExtractorError(
4120 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4121 expected=True)