]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Write thumbnail of playlist
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
cd7c66cf 70 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
68b91dc9 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
68217024 88 username, password = self._get_login_info()
b2e8bc1b
JMF
89 # No authentication to be performed
90 if username is None:
70d35d16 91 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 93 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
94 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 95 return True
b2e8bc1b 96
7cc3570e
PH
97 login_page = self._download_webpage(
98 self._LOGIN_URL, None,
69ea8ca4
PH
99 note='Downloading login page',
100 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
101 if login_page is False:
102 return
b2e8bc1b 103
1212e997 104 login_form = self._hidden_inputs(login_page)
c5e8d7af 105
e00eb564
S
106 def req(url, f_req, note, errnote):
107 data = login_form.copy()
108 data.update({
109 'pstMsg': 1,
110 'checkConnection': 'youtube',
111 'checkedDomains': 'youtube',
112 'hl': 'en',
113 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 114 'f.req': json.dumps(f_req),
e00eb564
S
115 'flowName': 'GlifWebSignIn',
116 'flowEntry': 'ServiceLogin',
baf67a60
S
117 # TODO: reverse actual botguard identifier generation algo
118 'bgRequest': '["identifier",""]',
041bc3ad 119 })
e00eb564
S
120 return self._download_json(
121 url, None, note=note, errnote=errnote,
122 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
123 fatal=False,
124 data=urlencode_postdata(data), headers={
125 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
126 'Google-Accounts-XSRF': 1,
127 })
128
3995d37d 129 def warn(message):
6a39ee13 130 self.report_warning(message)
3995d37d
S
131
132 lookup_req = [
133 username,
134 None, [], None, 'US', None, None, 2, False, True,
135 [
136 None, None,
137 [2, 1, None, 1,
138 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
139 None, [], 4],
140 1, [None, None, []], None, None, None, True
141 ],
142 username,
143 ]
144
e00eb564 145 lookup_results = req(
3995d37d 146 self._LOOKUP_URL, lookup_req,
e00eb564
S
147 'Looking up account info', 'Unable to look up account info')
148
149 if lookup_results is False:
150 return False
041bc3ad 151
3995d37d
S
152 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
153 if not user_hash:
154 warn('Unable to extract user hash')
155 return False
156
157 challenge_req = [
158 user_hash,
159 None, 1, None, [1, None, None, None, [password, None, True]],
160 [
161 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
162 1, [None, None, []], None, None, None, True
163 ]]
83317f69 164
3995d37d
S
165 challenge_results = req(
166 self._CHALLENGE_URL, challenge_req,
167 'Logging in', 'Unable to log in')
83317f69 168
3995d37d 169 if challenge_results is False:
e00eb564 170 return
83317f69 171
3995d37d
S
172 login_res = try_get(challenge_results, lambda x: x[0][5], list)
173 if login_res:
174 login_msg = try_get(login_res, lambda x: x[5], compat_str)
175 warn(
176 'Unable to login: %s' % 'Invalid password'
177 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
178 return False
179
180 res = try_get(challenge_results, lambda x: x[0][-1], list)
181 if not res:
182 warn('Unable to extract result entry')
183 return False
184
9a6628aa
S
185 login_challenge = try_get(res, lambda x: x[0][0], list)
186 if login_challenge:
187 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
188 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
189 # SEND_SUCCESS - TFA code has been successfully sent to phone
190 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 191 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
192 if status == 'QUOTA_EXCEEDED':
193 warn('Exceeded the limit of TFA codes, try later')
194 return False
195
196 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
197 if not tl:
198 warn('Unable to extract TL')
199 return False
200
201 tfa_code = self._get_tfa_info('2-step verification code')
202
203 if not tfa_code:
204 warn(
205 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
206 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
207 return False
208
209 tfa_code = remove_start(tfa_code, 'G-')
210
211 tfa_req = [
212 user_hash, None, 2, None,
213 [
214 9, None, None, None, None, None, None, None,
215 [None, tfa_code, True, 2]
216 ]]
217
218 tfa_results = req(
219 self._TFA_URL.format(tl), tfa_req,
220 'Submitting TFA code', 'Unable to submit TFA code')
221
222 if tfa_results is False:
223 return False
224
225 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
226 if tfa_res:
227 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
228 warn(
229 'Unable to finish TFA: %s' % 'Invalid TFA code'
230 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
231 return False
232
233 check_cookie_url = try_get(
234 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
235 else:
236 CHALLENGES = {
237 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
238 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
239 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
240 }
241 challenge = CHALLENGES.get(
242 challenge_str,
243 '%s returned error %s.' % (self.IE_NAME, challenge_str))
244 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
245 return False
3995d37d
S
246 else:
247 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
248
249 if not check_cookie_url:
250 warn('Unable to extract CheckCookie URL')
251 return False
e00eb564
S
252
253 check_cookie_results = self._download_webpage(
3995d37d
S
254 check_cookie_url, None, 'Checking cookie', fatal=False)
255
256 if check_cookie_results is False:
257 return False
e00eb564 258
3995d37d
S
259 if 'https://myaccount.google.com/' not in check_cookie_results:
260 warn('Unable to log in')
b2e8bc1b 261 return False
e00eb564 262
b2e8bc1b
JMF
263 return True
264
cce889b9 265 def _initialize_consent(self):
266 cookies = self._get_cookies('https://www.youtube.com/')
267 if cookies.get('__Secure-3PSID'):
268 return
269 consent_id = None
270 consent = cookies.get('CONSENT')
271 if consent:
272 if 'YES' in consent.value:
273 return
274 consent_id = self._search_regex(
275 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
276 if not consent_id:
277 consent_id = random.randint(100, 999)
278 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 279
b2e8bc1b 280 def _real_initialize(self):
cce889b9 281 self._initialize_consent()
b2e8bc1b
JMF
282 if self._downloader is None:
283 return
b2e8bc1b
JMF
284 if not self._login():
285 return
c5e8d7af 286
f4f751af 287 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
288 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 289 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 290 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
291 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 292
a5c56234
M
293 def _generate_sapisidhash_header(self):
294 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
295 if sapisid_cookie is None:
296 return
297 time_now = round(time.time())
298 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
299 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
300
301 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 302 note='Downloading API JSON', errnote='Unable to download API page',
303 context=None, api_key=None):
304
305 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 306 data.update(query)
f4f751af 307 real_headers = self._generate_api_headers()
308 real_headers.update({'content-type': 'application/json'})
309 if headers:
310 real_headers.update(headers)
545cc85d 311 return self._download_json(
a5c56234
M
312 'https://www.youtube.com/youtubei/v1/%s' % ep,
313 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 314 data=json.dumps(data).encode('utf8'), headers=real_headers,
315 query={'key': api_key or self._extract_api_key()})
316
317 def _extract_api_key(self, ytcfg=None):
318 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 319
8bdd16b4 320 def _extract_yt_initial_data(self, video_id, webpage):
321 return self._parse_json(
322 self._search_regex(
29f7c58a 323 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 324 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 325 video_id)
0c148415 326
a1c5d2ca
M
327 def _extract_identity_token(self, webpage, item_id):
328 ytcfg = self._extract_ytcfg(item_id, webpage)
329 if ytcfg:
330 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
331 if token:
332 return token
333 return self._search_regex(
334 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
335 'identity token', default=None)
336
337 @staticmethod
338 def _extract_account_syncid(data):
8ea3f7b9 339 """
340 Extract syncId required to download private playlists of secondary channels
341 @param data Either response or ytcfg
342 """
343 sync_ids = (try_get(
344 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
345 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
346 if len(sync_ids) >= 2 and sync_ids[1]:
347 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
348 # and just "user_syncid||" for primary channel. We only want the channel_syncid
349 return sync_ids[0]
8ea3f7b9 350 # ytcfg includes channel_syncid if on secondary channel
351 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 352
29f7c58a 353 def _extract_ytcfg(self, video_id, webpage):
8c54a305 354 if not webpage:
355 return {}
29f7c58a 356 return self._parse_json(
357 self._search_regex(
358 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 359 default='{}'), video_id, fatal=False) or {}
360
361 def __extract_client_version(self, ytcfg):
362 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
363
364 def _extract_context(self, ytcfg=None):
365 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
366 if context:
367 return context
368
369 # Recreate the client context (required)
370 client_version = self.__extract_client_version(ytcfg)
371 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
372 context = {
373 'client': {
374 'clientName': client_name,
375 'clientVersion': client_version,
376 }
377 }
378 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
379 if visitor_data:
380 context['client']['visitorData'] = visitor_data
381 return context
382
383 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
384 headers = {
385 'X-YouTube-Client-Name': '1',
386 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
387 }
388 if identity_token:
389 headers['x-youtube-identity-token'] = identity_token
390 if account_syncid:
391 headers['X-Goog-PageId'] = account_syncid
392 headers['X-Goog-AuthUser'] = 0
393 if visitor_data:
394 headers['x-goog-visitor-id'] = visitor_data
395 auth = self._generate_sapisidhash_header()
396 if auth is not None:
397 headers['Authorization'] = auth
398 headers['X-Origin'] = 'https://www.youtube.com'
399 return headers
29f7c58a 400
9297939e 401 @staticmethod
402 def is_music_url(url):
403 return re.match(r'https?://music\.youtube\.com/', url) is not None
404
30a074c2 405 def _extract_video(self, renderer):
406 video_id = renderer.get('videoId')
407 title = try_get(
408 renderer,
409 (lambda x: x['title']['runs'][0]['text'],
410 lambda x: x['title']['simpleText']), compat_str)
411 description = try_get(
412 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
413 compat_str)
414 duration = parse_duration(try_get(
415 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
416 view_count_text = try_get(
417 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
418 view_count = str_to_int(self._search_regex(
419 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
420 'view count', default=None))
421 uploader = try_get(
bc2ca1bb 422 renderer,
423 (lambda x: x['ownerText']['runs'][0]['text'],
424 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 425 return {
39ed931e 426 '_type': 'url',
30a074c2 427 'ie_key': YoutubeIE.ie_key(),
428 'id': video_id,
429 'url': video_id,
430 'title': title,
431 'description': description,
432 'duration': duration,
433 'view_count': view_count,
434 'uploader': uploader,
435 }
436
0c148415 437
360e1ca5 438class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 439 IE_DESC = 'YouTube.com'
bc2ca1bb 440 _INVIDIOUS_SITES = (
441 # invidious-redirect websites
442 r'(?:www\.)?redirect\.invidious\.io',
443 r'(?:(?:www|dev)\.)?invidio\.us',
444 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
445 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 446 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 447 r'(?:(?:www|au)\.)?ytprivate\.com',
448 r'(?:www\.)?invidious\.namazso\.eu',
449 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 450 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
451 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
452 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
453 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
454 # youtube-dl invidious instances list
455 r'(?:(?:www|no)\.)?invidiou\.sh',
456 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
457 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 458 r'(?:www\.)?invidious\.mastodon\.host',
459 r'(?:www\.)?invidious\.zapashcanon\.fr',
460 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 461 r'(?:www\.)?invidious\.tinfoil-hat\.net',
462 r'(?:www\.)?invidious\.himiko\.cloud',
463 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 464 r'(?:www\.)?invidious\.tube',
465 r'(?:www\.)?invidiou\.site',
466 r'(?:www\.)?invidious\.site',
467 r'(?:www\.)?invidious\.xyz',
468 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 469 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 470 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 471 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 472 r'(?:www\.)?tube\.poal\.co',
473 r'(?:www\.)?tube\.connect\.cafe',
474 r'(?:www\.)?vid\.wxzm\.sx',
475 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 476 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 477 r'(?:www\.)?yewtu\.be',
478 r'(?:www\.)?yt\.elukerio\.org',
479 r'(?:www\.)?yt\.lelux\.fi',
480 r'(?:www\.)?invidious\.ggc-project\.de',
481 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 482 r'(?:www\.)?ytprivate\.com',
483 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 484 r'(?:www\.)?invidious\.toot\.koeln',
485 r'(?:www\.)?invidious\.fdn\.fr',
486 r'(?:www\.)?watch\.nettohikari\.com',
487 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
488 r'(?:www\.)?qklhadlycap4cnod\.onion',
489 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
490 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
491 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
492 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
493 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
494 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
495 )
cb7dfeea 496 _VALID_URL = r"""(?x)^
c5e8d7af 497 (
edb53e2d 498 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 499 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
500 (?:www\.)?deturl\.com/www\.youtube\.com|
501 (?:www\.)?pwnyoutube\.com|
502 (?:www\.)?hooktube\.com|
503 (?:www\.)?yourepeat\.com|
504 tube\.majestyc\.net|
505 %(invidious)s|
506 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
507 (?:.*?\#/)? # handle anchor (#/) redirect urls
508 (?: # the various things that can precede the ID:
ac7553d0 509 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 510 |(?: # or the v= param in all its forms
f7000f3a 511 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 512 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 513 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
514 v=
515 )
f4b05232 516 ))
cbaed4bb
S
517 |(?:
518 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
519 vid\.plus| # or vid.plus/xxxx
520 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 521 %(invidious)s
cbaed4bb 522 )/
edb53e2d 523 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 524 )
c5e8d7af 525 )? # all until now is optional -> you can pass the naked ID
201c1459 526 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 527 (?(1).+)? # if we found the ID, everything can follow
9297939e 528 (?:\#|$)""" % {
bc2ca1bb 529 'invidious': '|'.join(_INVIDIOUS_SITES),
530 }
e40c758c 531 _PLAYER_INFO_RE = (
cc2db878 532 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
533 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 534 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 535 )
2c62dc26 536 _formats = {
c2d3cb4c 537 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
538 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
539 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
540 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
541 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
542 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
543 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
544 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 545 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 546 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
547 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
548 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
549 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
550 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
551 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 552 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 553 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
554 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 555
556
557 # 3D videos
c2d3cb4c 558 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
559 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
560 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
561 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 562 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
563 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
564 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 565
96fb5605 566 # Apple HTTP Live Streaming
11f12195 567 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 568 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
569 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
570 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
571 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
572 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 573 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
574 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
575
576 # DASH mp4 video
d23028a8
S
577 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
578 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
579 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
580 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
581 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 582 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
583 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
584 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
585 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
586 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
587 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
588 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 589
f6f1fc92 590 # Dash mp4 audio
d23028a8
S
591 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
592 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
593 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
594 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
595 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
596 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
597 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
598
599 # Dash webm
d23028a8
S
600 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
601 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
602 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
603 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
604 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
605 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
606 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
607 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
608 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
609 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
610 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
611 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
612 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
613 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
614 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 615 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
616 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
617 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
618 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
619 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
620 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
621 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
622
623 # Dash webm audio
d23028a8
S
624 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
625 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 626
0857baad 627 # Dash webm audio with opus inside
d23028a8
S
628 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
629 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
630 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 631
ce6b9a2d
PH
632 # RTMP (unnamed)
633 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
634
635 # av01 video only formats sometimes served with "unknown" codecs
636 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
637 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
638 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
639 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 640 }
29f7c58a 641 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 642
fd5c4aab
S
643 _GEO_BYPASS = False
644
78caa52a 645 IE_NAME = 'youtube'
2eb88d95
PH
646 _TESTS = [
647 {
2d3d2997 648 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
649 'info_dict': {
650 'id': 'BaW_jenozKc',
651 'ext': 'mp4',
3867038a 652 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
653 'uploader': 'Philipp Hagemeister',
654 'uploader_id': 'phihag',
ec85ded8 655 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
656 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
657 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 658 'upload_date': '20121002',
3867038a 659 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 660 'categories': ['Science & Technology'],
3867038a 661 'tags': ['youtube-dl'],
556dbe7f 662 'duration': 10,
dbdaaa23 663 'view_count': int,
3e7c1224
PH
664 'like_count': int,
665 'dislike_count': int,
7c80519c 666 'start_time': 1,
297a564b 667 'end_time': 9,
2eb88d95 668 }
0e853ca4 669 },
fccd3771 670 {
4bc3a23e
PH
671 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
672 'note': 'Embed-only video (#1746)',
673 'info_dict': {
674 'id': 'yZIXLfi8CZQ',
675 'ext': 'mp4',
676 'upload_date': '20120608',
677 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
678 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
679 'uploader': 'SET India',
94bfcd23 680 'uploader_id': 'setindia',
ec85ded8 681 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 682 'age_limit': 18,
545cc85d 683 },
684 'skip': 'Private video',
fccd3771 685 },
11b56058 686 {
8bdd16b4 687 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
688 'note': 'Use the first video ID in the URL',
689 'info_dict': {
690 'id': 'BaW_jenozKc',
691 'ext': 'mp4',
3867038a 692 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
693 'uploader': 'Philipp Hagemeister',
694 'uploader_id': 'phihag',
ec85ded8 695 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 696 'upload_date': '20121002',
3867038a 697 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 698 'categories': ['Science & Technology'],
3867038a 699 'tags': ['youtube-dl'],
556dbe7f 700 'duration': 10,
dbdaaa23 701 'view_count': int,
11b56058
PM
702 'like_count': int,
703 'dislike_count': int,
34a7de29
S
704 },
705 'params': {
706 'skip_download': True,
707 },
11b56058 708 },
dd27fd17 709 {
2d3d2997 710 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
711 'note': '256k DASH audio (format 141) via DASH manifest',
712 'info_dict': {
713 'id': 'a9LDPn-MO4I',
714 'ext': 'm4a',
715 'upload_date': '20121002',
716 'uploader_id': '8KVIDEO',
ec85ded8 717 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
718 'description': '',
719 'uploader': '8KVIDEO',
720 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 721 },
4bc3a23e
PH
722 'params': {
723 'youtube_include_dash_manifest': True,
724 'format': '141',
4919603f 725 },
de3c7fe0 726 'skip': 'format 141 not served anymore',
dd27fd17 727 },
8bdd16b4 728 # DASH manifest with encrypted signature
729 {
730 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
731 'info_dict': {
732 'id': 'IB3lcPjvWLA',
733 'ext': 'm4a',
734 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
735 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
736 'duration': 244,
737 'uploader': 'AfrojackVEVO',
738 'uploader_id': 'AfrojackVEVO',
739 'upload_date': '20131011',
cc2db878 740 'abr': 129.495,
8bdd16b4 741 },
742 'params': {
743 'youtube_include_dash_manifest': True,
744 'format': '141/bestaudio[ext=m4a]',
745 },
746 },
aa79ac0c
PH
747 # Controversy video
748 {
749 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
750 'info_dict': {
751 'id': 'T4XJQO3qol8',
752 'ext': 'mp4',
556dbe7f 753 'duration': 219,
aa79ac0c 754 'upload_date': '20100909',
4fe54c12 755 'uploader': 'Amazing Atheist',
aa79ac0c 756 'uploader_id': 'TheAmazingAtheist',
ec85ded8 757 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 758 'title': 'Burning Everyone\'s Koran',
545cc85d 759 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 760 }
c522adb1 761 },
dd2d55f1 762 # Normal age-gate video (embed allowed)
c522adb1 763 {
2d3d2997 764 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
765 'info_dict': {
766 'id': 'HtVdAasjOgU',
767 'ext': 'mp4',
768 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 769 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 770 'duration': 142,
c522adb1
JMF
771 'uploader': 'The Witcher',
772 'uploader_id': 'WitcherGame',
ec85ded8 773 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 774 'upload_date': '20140605',
34952f09 775 'age_limit': 18,
c522adb1
JMF
776 },
777 },
8bdd16b4 778 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
779 # YouTube Red ad is not captured for creator
780 {
781 'url': '__2ABJjxzNo',
782 'info_dict': {
783 'id': '__2ABJjxzNo',
784 'ext': 'mp4',
785 'duration': 266,
786 'upload_date': '20100430',
787 'uploader_id': 'deadmau5',
788 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 789 'creator': 'deadmau5',
790 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 791 'uploader': 'deadmau5',
792 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 793 'alt_title': 'Some Chords',
8bdd16b4 794 },
795 'expected_warnings': [
796 'DASH manifest missing',
797 ]
798 },
067aa17e 799 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
800 {
801 'url': 'lqQg6PlCWgI',
802 'info_dict': {
803 'id': 'lqQg6PlCWgI',
804 'ext': 'mp4',
556dbe7f 805 'duration': 6085,
90227264 806 'upload_date': '20150827',
cbe2bd91 807 'uploader_id': 'olympic',
ec85ded8 808 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 809 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 810 'uploader': 'Olympic',
cbe2bd91
PH
811 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
812 },
813 'params': {
814 'skip_download': 'requires avconv',
e52a40ab 815 }
cbe2bd91 816 },
6271f1ca
PH
817 # Non-square pixels
818 {
819 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
820 'info_dict': {
821 'id': '_b-2C3KPAM0',
822 'ext': 'mp4',
823 'stretched_ratio': 16 / 9.,
556dbe7f 824 'duration': 85,
6271f1ca
PH
825 'upload_date': '20110310',
826 'uploader_id': 'AllenMeow',
ec85ded8 827 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 828 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 829 'uploader': '孫ᄋᄅ',
6271f1ca
PH
830 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
831 },
06b491eb
S
832 },
833 # url_encoded_fmt_stream_map is empty string
834 {
835 'url': 'qEJwOuvDf7I',
836 'info_dict': {
837 'id': 'qEJwOuvDf7I',
f57b7835 838 'ext': 'webm',
06b491eb
S
839 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
840 'description': '',
841 'upload_date': '20150404',
842 'uploader_id': 'spbelect',
843 'uploader': 'Наблюдатели Петербурга',
844 },
845 'params': {
846 'skip_download': 'requires avconv',
e323cf3f
S
847 },
848 'skip': 'This live event has ended.',
06b491eb 849 },
067aa17e 850 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
851 {
852 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
853 'info_dict': {
854 'id': 'FIl7x6_3R5Y',
eb6793ba 855 'ext': 'webm',
da77d856
S
856 'title': 'md5:7b81415841e02ecd4313668cde88737a',
857 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 858 'duration': 220,
da77d856
S
859 'upload_date': '20150625',
860 'uploader_id': 'dorappi2000',
ec85ded8 861 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 862 'uploader': 'dorappi2000',
eb6793ba 863 'formats': 'mincount:31',
da77d856 864 },
eb6793ba 865 'skip': 'not actual anymore',
2ee8f5d8 866 },
8a1a26ce
YCH
867 # DASH manifest with segment_list
868 {
869 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
870 'md5': '8ce563a1d667b599d21064e982ab9e31',
871 'info_dict': {
872 'id': 'CsmdDsKjzN8',
873 'ext': 'mp4',
17ee98e1 874 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
875 'uploader': 'Airtek',
876 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
877 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
878 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
879 },
880 'params': {
881 'youtube_include_dash_manifest': True,
882 'format': '135', # bestvideo
be49068d
S
883 },
884 'skip': 'This live event has ended.',
2ee8f5d8 885 },
cf7e015f
S
886 {
887 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 888 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 889 'info_dict': {
545cc85d 890 'id': 'jvGDaLqkpTg',
891 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
892 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
893 },
894 'playlist': [{
895 'info_dict': {
545cc85d 896 'id': 'jvGDaLqkpTg',
cf7e015f 897 'ext': 'mp4',
545cc85d 898 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
899 'description': 'md5:e03b909557865076822aa169218d6a5d',
900 'duration': 10643,
901 'upload_date': '20161111',
902 'uploader': 'Team PGP',
903 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
904 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
905 },
906 }, {
907 'info_dict': {
545cc85d 908 'id': '3AKt1R1aDnw',
cf7e015f 909 'ext': 'mp4',
545cc85d 910 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
911 'description': 'md5:e03b909557865076822aa169218d6a5d',
912 'duration': 10991,
913 'upload_date': '20161111',
914 'uploader': 'Team PGP',
915 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
916 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
917 },
918 }, {
919 'info_dict': {
545cc85d 920 'id': 'RtAMM00gpVc',
cf7e015f 921 'ext': 'mp4',
545cc85d 922 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
923 'description': 'md5:e03b909557865076822aa169218d6a5d',
924 'duration': 10995,
925 'upload_date': '20161111',
926 'uploader': 'Team PGP',
927 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
928 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
929 },
930 }, {
931 'info_dict': {
545cc85d 932 'id': '6N2fdlP3C5U',
cf7e015f 933 'ext': 'mp4',
545cc85d 934 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
935 'description': 'md5:e03b909557865076822aa169218d6a5d',
936 'duration': 10990,
937 'upload_date': '20161111',
938 'uploader': 'Team PGP',
939 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
940 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
941 },
942 }],
943 'params': {
944 'skip_download': True,
945 },
cbaed4bb 946 },
f9f49d87 947 {
067aa17e 948 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
949 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
950 'info_dict': {
951 'id': 'gVfLd0zydlo',
952 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
953 },
954 'playlist_count': 2,
be49068d 955 'skip': 'Not multifeed anymore',
f9f49d87 956 },
cbaed4bb 957 {
2d3d2997 958 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 959 'only_matching': True,
0e49d9a6 960 },
6d4fc66b 961 {
2d3d2997 962 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
963 'only_matching': True,
964 },
0e49d9a6 965 {
067aa17e 966 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 967 # Also tests cut-off URL expansion in video description (see
067aa17e
S
968 # https://github.com/ytdl-org/youtube-dl/issues/1892,
969 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
970 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
971 'info_dict': {
972 'id': 'lsguqyKfVQg',
973 'ext': 'mp4',
974 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 975 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 976 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 977 'duration': 133,
0e49d9a6
LL
978 'upload_date': '20151119',
979 'uploader_id': 'IronSoulElf',
ec85ded8 980 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 981 'uploader': 'IronSoulElf',
eb6793ba
S
982 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
983 'track': 'Dark Walk - Position Music',
984 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 985 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
986 },
987 'params': {
988 'skip_download': True,
989 },
990 },
61f92af1 991 {
067aa17e 992 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
993 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
994 'only_matching': True,
995 },
313dfc45
LL
996 {
997 # Video with yt:stretch=17:0
998 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
999 'info_dict': {
1000 'id': 'Q39EVAstoRM',
1001 'ext': 'mp4',
1002 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1003 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1004 'upload_date': '20151107',
1005 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1006 'uploader': 'CH GAMER DROID',
1007 },
1008 'params': {
1009 'skip_download': True,
1010 },
be49068d 1011 'skip': 'This video does not exist.',
313dfc45 1012 },
201c1459 1013 {
1014 # Video with incomplete 'yt:stretch=16:'
1015 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1016 'only_matching': True,
1017 },
7caf9830
S
1018 {
1019 # Video licensed under Creative Commons
1020 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1021 'info_dict': {
1022 'id': 'M4gD1WSo5mA',
1023 'ext': 'mp4',
1024 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1025 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1026 'duration': 721,
7caf9830
S
1027 'upload_date': '20150127',
1028 'uploader_id': 'BerkmanCenter',
ec85ded8 1029 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1030 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1031 'license': 'Creative Commons Attribution license (reuse allowed)',
1032 },
1033 'params': {
1034 'skip_download': True,
1035 },
1036 },
fd050249
S
1037 {
1038 # Channel-like uploader_url
1039 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1040 'info_dict': {
1041 'id': 'eQcmzGIKrzg',
1042 'ext': 'mp4',
1043 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1044 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1045 'duration': 4060,
fd050249 1046 'upload_date': '20151119',
eb6793ba 1047 'uploader': 'Bernie Sanders',
fd050249 1048 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1049 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1050 'license': 'Creative Commons Attribution license (reuse allowed)',
1051 },
1052 'params': {
1053 'skip_download': True,
1054 },
1055 },
040ac686
S
1056 {
1057 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1058 'only_matching': True,
7f29cf54
S
1059 },
1060 {
067aa17e 1061 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1062 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1063 'only_matching': True,
6496ccb4
S
1064 },
1065 {
1066 # Rental video preview
1067 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1068 'info_dict': {
1069 'id': 'uGpuVWrhIzE',
1070 'ext': 'mp4',
1071 'title': 'Piku - Trailer',
1072 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1073 'upload_date': '20150811',
1074 'uploader': 'FlixMatrix',
1075 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1076 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1077 'license': 'Standard YouTube License',
1078 },
1079 'params': {
1080 'skip_download': True,
1081 },
eb6793ba 1082 'skip': 'This video is not available.',
022a5d66 1083 },
12afdc2a
S
1084 {
1085 # YouTube Red video with episode data
1086 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1087 'info_dict': {
1088 'id': 'iqKdEhx-dD4',
1089 'ext': 'mp4',
1090 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1091 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1092 'duration': 2085,
12afdc2a
S
1093 'upload_date': '20170118',
1094 'uploader': 'Vsauce',
1095 'uploader_id': 'Vsauce',
1096 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1097 'series': 'Mind Field',
1098 'season_number': 1,
1099 'episode_number': 1,
1100 },
1101 'params': {
1102 'skip_download': True,
1103 },
1104 'expected_warnings': [
1105 'Skipping DASH manifest',
1106 ],
1107 },
c7121fa7
S
1108 {
1109 # The following content has been identified by the YouTube community
1110 # as inappropriate or offensive to some audiences.
1111 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1112 'info_dict': {
1113 'id': '6SJNVb0GnPI',
1114 'ext': 'mp4',
1115 'title': 'Race Differences in Intelligence',
1116 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1117 'duration': 965,
1118 'upload_date': '20140124',
1119 'uploader': 'New Century Foundation',
1120 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1121 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1122 },
1123 'params': {
1124 'skip_download': True,
1125 },
545cc85d 1126 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1127 },
022a5d66
S
1128 {
1129 # itag 212
1130 'url': '1t24XAntNCY',
1131 'only_matching': True,
fd5c4aab
S
1132 },
1133 {
1134 # geo restricted to JP
1135 'url': 'sJL6WA-aGkQ',
1136 'only_matching': True,
1137 },
cd5a74a2
S
1138 {
1139 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1140 'only_matching': True,
1141 },
bc2ca1bb 1142 {
1143 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1144 'only_matching': True,
1145 },
1146 {
1147 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1148 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1149 'only_matching': True,
1150 },
825cd268
RA
1151 {
1152 # DRM protected
1153 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1154 'only_matching': True,
4fe54c12
S
1155 },
1156 {
1157 # Video with unsupported adaptive stream type formats
1158 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1159 'info_dict': {
1160 'id': 'Z4Vy8R84T1U',
1161 'ext': 'mp4',
1162 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1163 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1164 'duration': 433,
1165 'upload_date': '20130923',
1166 'uploader': 'Amelia Putri Harwita',
1167 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1168 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1169 'formats': 'maxcount:10',
1170 },
1171 'params': {
1172 'skip_download': True,
1173 'youtube_include_dash_manifest': False,
1174 },
5429d6a9 1175 'skip': 'not actual anymore',
5caabd3c 1176 },
1177 {
822b9d9c 1178 # Youtube Music Auto-generated description
5caabd3c 1179 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1180 'info_dict': {
1181 'id': 'MgNrAu2pzNs',
1182 'ext': 'mp4',
1183 'title': 'Voyeur Girl',
1184 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1185 'upload_date': '20190312',
5429d6a9
S
1186 'uploader': 'Stephen - Topic',
1187 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1188 'artist': 'Stephen',
1189 'track': 'Voyeur Girl',
1190 'album': 'it\'s too much love to know my dear',
1191 'release_date': '20190313',
1192 'release_year': 2019,
1193 },
1194 'params': {
1195 'skip_download': True,
1196 },
1197 },
66b48727
RA
1198 {
1199 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1200 'only_matching': True,
1201 },
011e75e6
S
1202 {
1203 # invalid -> valid video id redirection
1204 'url': 'DJztXj2GPfl',
1205 'info_dict': {
1206 'id': 'DJztXj2GPfk',
1207 'ext': 'mp4',
1208 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1209 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1210 'upload_date': '20090125',
1211 'uploader': 'Prochorowka',
1212 'uploader_id': 'Prochorowka',
1213 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1214 'artist': 'Panjabi MC',
1215 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1216 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1217 },
1218 'params': {
1219 'skip_download': True,
1220 },
545cc85d 1221 'skip': 'Video unavailable',
ea74e00b
DP
1222 },
1223 {
1224 # empty description results in an empty string
1225 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1226 'info_dict': {
1227 'id': 'x41yOUIvK2k',
1228 'ext': 'mp4',
1229 'title': 'IMG 3456',
1230 'description': '',
1231 'upload_date': '20170613',
1232 'uploader_id': 'ElevageOrVert',
1233 'uploader': 'ElevageOrVert',
1234 },
1235 'params': {
1236 'skip_download': True,
1237 },
1238 },
a0566bbf 1239 {
29f7c58a 1240 # with '};' inside yt initial data (see [1])
1241 # see [2] for an example with '};' inside ytInitialPlayerResponse
1242 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1243 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1244 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1245 'info_dict': {
1246 'id': 'CHqg6qOn4no',
1247 'ext': 'mp4',
1248 'title': 'Part 77 Sort a list of simple types in c#',
1249 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1250 'upload_date': '20130831',
1251 'uploader_id': 'kudvenkat',
1252 'uploader': 'kudvenkat',
1253 },
1254 'params': {
1255 'skip_download': True,
1256 },
1257 },
29f7c58a 1258 {
1259 # another example of '};' in ytInitialData
1260 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1261 'only_matching': True,
1262 },
1263 {
1264 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1265 'only_matching': True,
1266 },
545cc85d 1267 {
cc2db878 1268 # https://github.com/ytdl-org/youtube-dl/pull/28094
1269 'url': 'OtqTfy26tG0',
1270 'info_dict': {
1271 'id': 'OtqTfy26tG0',
1272 'ext': 'mp4',
1273 'title': 'Burn Out',
1274 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1275 'upload_date': '20141120',
1276 'uploader': 'The Cinematic Orchestra - Topic',
1277 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1278 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1279 'artist': 'The Cinematic Orchestra',
1280 'track': 'Burn Out',
1281 'album': 'Every Day',
1282 'release_data': None,
1283 'release_year': None,
1284 },
1285 'params': {
1286 'skip_download': True,
1287 },
545cc85d 1288 },
bc2ca1bb 1289 {
1290 # controversial video, only works with bpctr when authenticated with cookies
1291 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1292 'only_matching': True,
1293 },
f7ad7160 1294 {
1295 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1296 'url': 'cBvYw8_A0vQ',
1297 'info_dict': {
1298 'id': 'cBvYw8_A0vQ',
1299 'ext': 'mp4',
1300 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1301 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1302 'upload_date': '20201120',
1303 'uploader': 'Walk around Japan',
1304 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1305 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1306 },
1307 'params': {
1308 'skip_download': True,
1309 },
0fb983f6 1310 }, {
1311 # Has multiple audio streams
1312 'url': 'WaOKSUlf4TM',
1313 'only_matching': True
9297939e 1314 }, {
1315 # Requires Premium: has format 141 when requested using YTM url
1316 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1317 'only_matching': True
1318 }, {
120916da 1319 # multiple subtitles with same lang_code
1320 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1321 'only_matching': True,
1322 },
2eb88d95
PH
1323 ]
1324
201c1459 1325 @classmethod
1326 def suitable(cls, url):
1bdae7d3 1327 # Hack for lazy extractors until more generic solution is implemented
1328 # (see #28780)
1329 from .youtube import parse_qs
201c1459 1330 qs = parse_qs(url)
1331 if qs.get('list', [None])[0]:
1332 return False
1333 return super(YoutubeIE, cls).suitable(url)
1334
e0df6211
PH
1335 def __init__(self, *args, **kwargs):
1336 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1337 self._code_cache = {}
83799698 1338 self._player_cache = {}
e0df6211 1339
60064c53
PH
1340 def _signature_cache_id(self, example_sig):
1341 """ Return a string representation of a signature """
78caa52a 1342 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1343
e40c758c
S
1344 @classmethod
1345 def _extract_player_info(cls, player_url):
1346 for player_re in cls._PLAYER_INFO_RE:
1347 id_m = re.search(player_re, player_url)
1348 if id_m:
1349 break
1350 else:
c081b35c 1351 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1352 return id_m.group('id')
e40c758c
S
1353
1354 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1355 player_id = self._extract_player_info(player_url)
e0df6211 1356
c4417ddb 1357 # Read from filesystem cache
545cc85d 1358 func_id = 'js_%s_%s' % (
1359 player_id, self._signature_cache_id(example_sig))
c4417ddb 1360 assert os.path.basename(func_id) == func_id
a0e07d31 1361
69ea8ca4 1362 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1363 if cache_spec is not None:
78caa52a 1364 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1365
545cc85d 1366 if player_id not in self._code_cache:
1367 self._code_cache[player_id] = self._download_webpage(
e0df6211 1368 player_url, video_id,
545cc85d 1369 note='Downloading player ' + player_id,
69ea8ca4 1370 errnote='Download of %s failed' % player_url)
545cc85d 1371 code = self._code_cache[player_id]
1372 res = self._parse_sig_js(code)
e0df6211 1373
785521bf
PH
1374 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1375 cache_res = res(test_string)
1376 cache_spec = [ord(c) for c in cache_res]
83799698 1377
69ea8ca4 1378 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1379 return res
1380
60064c53 1381 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1382 def gen_sig_code(idxs):
1383 def _genslice(start, end, step):
78caa52a 1384 starts = '' if start == 0 else str(start)
8bcc8756 1385 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1386 steps = '' if step == 1 else (':%d' % step)
78caa52a 1387 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1388
1389 step = None
7af808a5
PH
1390 # Quelch pyflakes warnings - start will be set when step is set
1391 start = '(Never used)'
edf3e38e
PH
1392 for i, prev in zip(idxs[1:], idxs[:-1]):
1393 if step is not None:
1394 if i - prev == step:
1395 continue
1396 yield _genslice(start, prev, step)
1397 step = None
1398 continue
1399 if i - prev in [-1, 1]:
1400 step = i - prev
1401 start = prev
1402 continue
1403 else:
78caa52a 1404 yield 's[%d]' % prev
edf3e38e 1405 if step is None:
78caa52a 1406 yield 's[%d]' % i
edf3e38e
PH
1407 else:
1408 yield _genslice(start, i, step)
1409
78caa52a 1410 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1411 cache_res = func(test_string)
edf3e38e 1412 cache_spec = [ord(c) for c in cache_res]
78caa52a 1413 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1414 signature_id_tuple = '(%s)' % (
1415 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1416 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1417 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1418 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1419
e0df6211
PH
1420 def _parse_sig_js(self, jscode):
1421 funcname = self._search_regex(
abefc03f
S
1422 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1423 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1424 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1425 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1426 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1427 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1428 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1429 # Obsolete patterns
1430 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1431 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1432 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1433 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1434 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1435 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1436 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1437 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1438 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1439
1440 jsi = JSInterpreter(jscode)
1441 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1442 return lambda s: initial_function([s])
1443
545cc85d 1444 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1445 """Turn the encrypted s field into a working signature"""
6b37f0be 1446
c8bf86d5 1447 if player_url is None:
69ea8ca4 1448 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1449
69ea8ca4 1450 if player_url.startswith('//'):
78caa52a 1451 player_url = 'https:' + player_url
3c90cc8b
S
1452 elif not re.match(r'https?://', player_url):
1453 player_url = compat_urlparse.urljoin(
1454 'https://www.youtube.com', player_url)
c8bf86d5 1455 try:
62af3a0e 1456 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1457 if player_id not in self._player_cache:
1458 func = self._extract_signature_function(
60064c53 1459 video_id, player_url, s
c8bf86d5
PH
1460 )
1461 self._player_cache[player_id] = func
1462 func = self._player_cache[player_id]
1463 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1464 self._print_sig_code(func, s)
c8bf86d5
PH
1465 return func(s)
1466 except Exception as e:
1467 tb = traceback.format_exc()
1468 raise ExtractorError(
78caa52a 1469 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1470
545cc85d 1471 def _mark_watched(self, video_id, player_response):
21c340b8
S
1472 playback_url = url_or_none(try_get(
1473 player_response,
545cc85d 1474 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1475 if not playback_url:
1476 return
1477 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1478 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1479
1480 # cpn generation algorithm is reverse engineered from base.js.
1481 # In fact it works even with dummy cpn.
1482 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1483 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1484
1485 qs.update({
1486 'ver': ['2'],
1487 'cpn': [cpn],
1488 })
1489 playback_url = compat_urlparse.urlunparse(
15707c7e 1490 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1491
1492 self._download_webpage(
1493 playback_url, video_id, 'Marking watched',
1494 'Unable to mark watched', fatal=False)
1495
66c9fa36
S
1496 @staticmethod
1497 def _extract_urls(webpage):
1498 # Embedded YouTube player
1499 entries = [
1500 unescapeHTML(mobj.group('url'))
1501 for mobj in re.finditer(r'''(?x)
1502 (?:
1503 <iframe[^>]+?src=|
1504 data-video-url=|
1505 <embed[^>]+?src=|
1506 embedSWF\(?:\s*|
1507 <object[^>]+data=|
1508 new\s+SWFObject\(
1509 )
1510 (["\'])
1511 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1512 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1513 \1''', webpage)]
1514
1515 # lazyYT YouTube embed
1516 entries.extend(list(map(
1517 unescapeHTML,
1518 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1519
1520 # Wordpress "YouTube Video Importer" plugin
1521 matches = re.findall(r'''(?x)<div[^>]+
1522 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1523 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1524 entries.extend(m[-1] for m in matches)
1525
1526 return entries
1527
1528 @staticmethod
1529 def _extract_url(webpage):
1530 urls = YoutubeIE._extract_urls(webpage)
1531 return urls[0] if urls else None
1532
97665381
PH
1533 @classmethod
1534 def extract_id(cls, url):
1535 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1536 if mobj is None:
69ea8ca4 1537 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1538 video_id = mobj.group(2)
1539 return video_id
1540
545cc85d 1541 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1542 chapters_list = try_get(
8bdd16b4 1543 data,
84213ea8
S
1544 lambda x: x['playerOverlays']
1545 ['playerOverlayRenderer']
1546 ['decoratedPlayerBarRenderer']
1547 ['decoratedPlayerBarRenderer']
1548 ['playerBar']
1549 ['chapteredPlayerBarRenderer']
1550 ['chapters'],
1551 list)
1552 if not chapters_list:
1553 return
1554
1555 def chapter_time(chapter):
1556 return float_or_none(
1557 try_get(
1558 chapter,
1559 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1560 int),
1561 scale=1000)
1562 chapters = []
1563 for next_num, chapter in enumerate(chapters_list, start=1):
1564 start_time = chapter_time(chapter)
1565 if start_time is None:
1566 continue
1567 end_time = (chapter_time(chapters_list[next_num])
1568 if next_num < len(chapters_list) else duration)
1569 if end_time is None:
1570 continue
1571 title = try_get(
1572 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1573 compat_str)
1574 chapters.append({
1575 'start_time': start_time,
1576 'end_time': end_time,
1577 'title': title,
1578 })
1579 return chapters
1580
545cc85d 1581 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1582 return self._parse_json(self._search_regex(
1583 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1584 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1585
d92f5d5a 1586 @staticmethod
1587 def parse_time_text(time_text):
1588 """
1589 Parse the comment time text
1590 time_text is in the format 'X units ago (edited)'
1591 """
1592 time_text_split = time_text.split(' ')
1593 if len(time_text_split) >= 3:
1594 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1595
a1c5d2ca
M
1596 @staticmethod
1597 def _join_text_entries(runs):
1598 text = None
1599 for run in runs:
1600 if not isinstance(run, dict):
1601 continue
1602 sub_text = try_get(run, lambda x: x['text'], compat_str)
1603 if sub_text:
1604 if not text:
1605 text = sub_text
1606 continue
1607 text += sub_text
1608 return text
1609
1610 def _extract_comment(self, comment_renderer, parent=None):
1611 comment_id = comment_renderer.get('commentId')
1612 if not comment_id:
1613 return
1614 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1615 text = self._join_text_entries(comment_text_runs) or ''
1616 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1617 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1618 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1619 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1620 author_id = try_get(comment_renderer,
1621 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1622 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1623 lambda x: x['likeCount']), compat_str)) or 0
1624 author_thumbnail = try_get(comment_renderer,
1625 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1626
1627 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1628 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1629 return {
1630 'id': comment_id,
1631 'text': text,
d92f5d5a 1632 'timestamp': timestamp,
a1c5d2ca
M
1633 'time_text': time_text,
1634 'like_count': votes,
1635 'is_favorited': is_liked,
1636 'author': author,
1637 'author_id': author_id,
1638 'author_thumbnail': author_thumbnail,
1639 'author_is_uploader': author_is_uploader,
1640 'parent': parent or 'root'
1641 }
1642
1643 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1644 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1645
1646 def extract_thread(parent_renderer):
1647 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1648 if not parent:
1649 comment_counts[2] = 0
1650 for content in contents:
1651 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1652 comment_renderer = try_get(
1653 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1654 content, (lambda x: x['commentRenderer'], dict))
1655
1656 if not comment_renderer:
1657 continue
1658 comment = self._extract_comment(comment_renderer, parent)
1659 if not comment:
1660 continue
1661 comment_counts[0] += 1
1662 yield comment
1663 # Attempt to get the replies
1664 comment_replies_renderer = try_get(
1665 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1666
1667 if comment_replies_renderer:
1668 comment_counts[2] += 1
1669 comment_entries_iter = self._comment_entries(
f4f751af 1670 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1671 parent=comment.get('id'), session_token_list=session_token_list,
1672 comment_counts=comment_counts)
1673
1674 for reply_comment in comment_entries_iter:
1675 yield reply_comment
1676
1677 if not comment_counts:
1678 # comment so far, est. total comments, current comment thread #
1679 comment_counts = [0, 0, 0]
a1c5d2ca
M
1680
1681 # TODO: Generalize the download code with TabIE
f4f751af 1682 context = self._extract_context(ytcfg)
1683 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1684 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1685 first_continuation = False
1686 if parent is None:
1687 first_continuation = True
1688
1689 for page_num in itertools.count(0):
1690 if not continuation:
1691 break
f4f751af 1692 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a1c5d2ca
M
1693 retries = self._downloader.params.get('extractor_retries', 3)
1694 count = -1
1695 last_error = None
1696
1697 while count < retries:
1698 count += 1
1699 if last_error:
1700 self.report_warning('%s. Retrying ...' % last_error)
1701 try:
1702 query = {
1703 'ctoken': continuation['ctoken'],
1704 'pbj': 1,
1705 'type': 'next',
1706 }
1707 if parent:
1708 query['action_get_comment_replies'] = 1
1709 else:
1710 query['action_get_comments'] = 1
1711
1712 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1713 if page_num == 0:
1714 if first_continuation:
d92f5d5a 1715 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1716 else:
d92f5d5a 1717 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1718 else:
d92f5d5a 1719 note_prefix = '%sDownloading comment%s page %d %s' % (
1720 ' ' if parent else '',
a1c5d2ca
M
1721 ' replies' if parent else '',
1722 page_num,
1723 comment_prog_str)
1724
1725 browse = self._download_json(
1726 'https://www.youtube.com/comment_service_ajax', None,
1727 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1728 headers=headers, query=query,
1729 data=urlencode_postdata({
1730 'session_token': session_token_list[0]
1731 }))
1732 except ExtractorError as e:
1733 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1734 if e.cause.code == 413:
d92f5d5a 1735 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1736 return
1737 # Downloading page may result in intermittent 5xx HTTP error
1738 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1739 last_error = 'HTTP Error %s' % e.cause.code
1740 if e.cause.code == 404:
d92f5d5a 1741 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1742 if count < retries:
1743 continue
1744 raise
1745 else:
1746 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1747 if session_token:
1748 session_token_list[0] = session_token
1749
1750 response = try_get(browse,
1751 (lambda x: x['response'],
1752 lambda x: x[1]['response'])) or {}
1753
1754 if response.get('continuationContents'):
1755 break
1756
1757 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1758 if browse.get('reload'):
d92f5d5a 1759 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1760
1761 # TODO: not tested, merged from old extractor
1762 err_msg = browse.get('externalErrorMessage')
1763 if err_msg:
1764 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1765
1766 # Youtube sometimes sends incomplete data
1767 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1768 last_error = 'Incomplete data received'
1769 if count >= retries:
6a39ee13 1770 raise ExtractorError(last_error)
a1c5d2ca
M
1771
1772 if not response:
1773 break
f4f751af 1774 visitor_data = try_get(
1775 response,
1776 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1777 compat_str) or visitor_data
a1c5d2ca
M
1778
1779 known_continuation_renderers = {
1780 'itemSectionContinuation': extract_thread,
1781 'commentRepliesContinuation': extract_thread
1782 }
1783
1784 # extract next root continuation from the results
1785 continuation_contents = try_get(
1786 response, lambda x: x['continuationContents'], dict) or {}
1787
1788 for key, value in continuation_contents.items():
1789 if key not in known_continuation_renderers:
1790 continue
1791 continuation_renderer = value
1792
1793 if first_continuation:
1794 first_continuation = False
1795 expected_comment_count = try_get(
1796 continuation_renderer,
1797 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1798 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1799 compat_str)
1800
1801 if expected_comment_count:
1802 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1803 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1804 yield comment_counts[1]
1805
1806 # TODO: cli arg.
1807 # 1/True for newest, 0/False for popular (default)
1808 comment_sort_index = int(True)
1809 sort_continuation_renderer = try_get(
1810 continuation_renderer,
1811 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1812 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1813 # If this fails, the initial continuation page
1814 # starts off with popular anyways.
1815 if sort_continuation_renderer:
1816 continuation = YoutubeTabIE._build_continuation_query(
1817 continuation=sort_continuation_renderer.get('continuation'),
1818 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1819 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1820 break
1821
1822 for entry in known_continuation_renderers[key](continuation_renderer):
1823 yield entry
1824
1825 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1826 break
1827
1828 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1829 """Entry for comment extraction"""
1830 comments = []
1831 known_entry_comment_renderers = (
1832 'itemSectionRenderer',
1833 )
1834 estimated_total = 0
1835 for entry in contents:
1836 for key, renderer in entry.items():
1837 if key not in known_entry_comment_renderers:
1838 continue
1839
1840 comment_iter = self._comment_entries(
1841 renderer,
1842 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1843 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1844 ytcfg=ytcfg,
a1c5d2ca
M
1845 session_token_list=[xsrf_token])
1846
1847 for comment in comment_iter:
1848 if isinstance(comment, int):
1849 estimated_total = comment
1850 continue
1851 comments.append(comment)
1852 break
d92f5d5a 1853 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1854 return {
1855 'comments': comments,
1856 'comment_count': len(comments),
1857 }
1858
c5e8d7af 1859 def _real_extract(self, url):
cf7e015f 1860 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1861 video_id = self._match_id(url)
9297939e 1862
1863 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
1864
545cc85d 1865 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1866 webpage_url = base_url + 'watch?v=' + video_id
1867 webpage = self._download_webpage(
cce889b9 1868 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1869
9297939e 1870 def get_text(x):
1871 if not x:
1872 return
1873 text = x.get('simpleText')
1874 if text and isinstance(text, compat_str):
1875 return text
1876 runs = x.get('runs')
1877 if not isinstance(runs, list):
1878 return
1879 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
1880
1881 ytm_streaming_data = {}
1882 if is_music_url:
1883 # we are forcing to use parse_json because 141 only appeared in get_video_info.
1884 # el, c, cver, cplayer field required for 141(aac 256kbps) codec
1885 # maybe paramter of youtube music player?
1886 ytm_player_response = self._parse_json(try_get(compat_parse_qs(
1887 self._download_webpage(
1888 base_url + 'get_video_info', video_id,
1889 'Fetching youtube-music info webpage',
1890 'unable to download youtube-music info webpage', query={
1891 'video_id': video_id,
1892 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1893 'el': 'detailpage',
1894 'c': 'WEB_REMIX',
1895 'cver': '0.1',
1896 'cplayer': 'UNIPLAYER'
1897 }, fatal=False)),
1898 lambda x: x['player_response'][0],
1899 compat_str) or '{}', video_id)
1900 ytm_streaming_data = ytm_player_response.get('streamingData') or {}
1901
545cc85d 1902 player_response = None
1903 if webpage:
1904 player_response = self._extract_yt_initial_variable(
1905 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1906 video_id, 'initial player response')
f4f751af 1907
1908 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1909 if not player_response:
1910 player_response = self._call_api(
f4f751af 1911 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1912
1913 playability_status = player_response.get('playabilityStatus') or {}
1914 if playability_status.get('reason') == 'Sign in to confirm your age':
1915 pr = self._parse_json(try_get(compat_parse_qs(
1916 self._download_webpage(
1917 base_url + 'get_video_info', video_id,
1918 'Refetching age-gated info webpage',
1919 'unable to download video info webpage', query={
1920 'video_id': video_id,
7c60c33e 1921 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1922 }, fatal=False)),
1923 lambda x: x['player_response'][0],
1924 compat_str) or '{}', video_id)
1925 if pr:
1926 player_response = pr
1927
1928 trailer_video_id = try_get(
1929 playability_status,
1930 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1931 compat_str)
1932 if trailer_video_id:
1933 return self.url_result(
1934 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1935
545cc85d 1936 search_meta = (
1937 lambda x: self._html_search_meta(x, webpage, default=None)) \
1938 if webpage else lambda x: None
dbdaaa23 1939
545cc85d 1940 video_details = player_response.get('videoDetails') or {}
37357d21 1941 microformat = try_get(
545cc85d 1942 player_response,
1943 lambda x: x['microformat']['playerMicroformatRenderer'],
1944 dict) or {}
1945 video_title = video_details.get('title') \
1946 or get_text(microformat.get('title')) \
1947 or search_meta(['og:title', 'twitter:title', 'title'])
1948 video_description = video_details.get('shortDescription')
cf7e015f 1949
8fe10494 1950 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1951 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1952 multifeed_metadata_list = try_get(
1953 player_response,
1954 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1955 compat_str)
8fe10494
S
1956 if multifeed_metadata_list:
1957 entries = []
1958 feed_ids = []
1959 for feed in multifeed_metadata_list.split(','):
1960 # Unquote should take place before split on comma (,) since textual
1961 # fields may contain comma as well (see
067aa17e 1962 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1963 feed_data = compat_parse_qs(
1964 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1965
1966 def feed_entry(name):
545cc85d 1967 return try_get(
1968 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1969
1970 feed_id = feed_entry('id')
1971 if not feed_id:
1972 continue
1973 feed_title = feed_entry('title')
1974 title = video_title
1975 if feed_title:
1976 title += ' (%s)' % feed_title
8fe10494
S
1977 entries.append({
1978 '_type': 'url_transparent',
1979 'ie_key': 'Youtube',
1980 'url': smuggle_url(
545cc85d 1981 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1982 {'force_singlefeed': True}),
6b09401b 1983 'title': title,
8fe10494 1984 })
6b09401b 1985 feed_ids.append(feed_id)
8fe10494
S
1986 self.to_screen(
1987 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1988 % (', '.join(feed_ids), video_id))
545cc85d 1989 return self.playlist_result(
1990 entries, video_id, video_title, video_description)
8fe10494
S
1991 else:
1992 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1993
9297939e 1994 formats, itags, stream_ids = [], [], []
cc2db878 1995 itag_qualities = {}
545cc85d 1996 player_url = None
dca3ff4a 1997 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
9297939e 1998
545cc85d 1999 streaming_data = player_response.get('streamingData') or {}
2000 streaming_formats = streaming_data.get('formats') or []
2001 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2002 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2003 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2004
545cc85d 2005 for fmt in streaming_formats:
2006 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2007 continue
321bf820 2008
cc2db878 2009 itag = str_or_none(fmt.get('itag'))
9297939e 2010 audio_track = fmt.get('audioTrack') or {}
2011 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2012 if stream_id in stream_ids:
2013 continue
2014
cc2db878 2015 quality = fmt.get('quality')
2016 if itag and quality:
2017 itag_qualities[itag] = quality
2018 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2019 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2020 # number of fragment that would subsequently requested with (`&sq=N`)
2021 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2022 continue
2023
545cc85d 2024 fmt_url = fmt.get('url')
2025 if not fmt_url:
2026 sc = compat_parse_qs(fmt.get('signatureCipher'))
2027 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2028 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2029 if not (sc and fmt_url and encrypted_sig):
2030 continue
2031 if not player_url:
2032 if not webpage:
2033 continue
2034 player_url = self._search_regex(
2035 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
2036 webpage, 'player URL', fatal=False)
2037 if not player_url:
201e9eaa 2038 continue
545cc85d 2039 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2040 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2041 fmt_url += '&' + sp + '=' + signature
2042
545cc85d 2043 if itag:
2044 itags.append(itag)
9297939e 2045 stream_ids.append(stream_id)
2046
cc2db878 2047 tbr = float_or_none(
2048 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2049 dct = {
2050 'asr': int_or_none(fmt.get('audioSampleRate')),
2051 'filesize': int_or_none(fmt.get('contentLength')),
2052 'format_id': itag,
0fb983f6 2053 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2054 'fps': int_or_none(fmt.get('fps')),
2055 'height': int_or_none(fmt.get('height')),
dca3ff4a 2056 'quality': q(quality),
cc2db878 2057 'tbr': tbr,
545cc85d 2058 'url': fmt_url,
2059 'width': fmt.get('width'),
0fb983f6 2060 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2061 }
2062 mimetype = fmt.get('mimeType')
2063 if mimetype:
2064 mobj = re.match(
2065 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2066 if mobj:
2067 dct['ext'] = mimetype2ext(mobj.group(1))
2068 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2069 no_audio = dct.get('acodec') == 'none'
2070 no_video = dct.get('vcodec') == 'none'
2071 if no_audio:
2072 dct['vbr'] = tbr
2073 if no_video:
2074 dct['abr'] = tbr
2075 if no_audio or no_video:
545cc85d 2076 dct['downloader_options'] = {
2077 # Youtube throttles chunks >~10M
2078 'http_chunk_size': 10485760,
bf1317d2 2079 }
7c60c33e 2080 if dct.get('ext'):
2081 dct['container'] = dct['ext'] + '_dash'
545cc85d 2082 formats.append(dct)
2083
9297939e 2084 for sd in (streaming_data, ytm_streaming_data):
2085 hls_manifest_url = sd.get('hlsManifestUrl')
2086 if hls_manifest_url:
2087 for f in self._extract_m3u8_formats(
2088 hls_manifest_url, video_id, 'mp4', fatal=False):
2089 itag = self._search_regex(
2090 r'/itag/(\d+)', f['url'], 'itag', default=None)
2091 if itag:
2092 f['format_id'] = itag
545cc85d 2093 formats.append(f)
2094
1418a043 2095 if self._downloader.params.get('youtube_include_dash_manifest', True):
9297939e 2096 for sd in (streaming_data, ytm_streaming_data):
2097 dash_manifest_url = sd.get('dashManifestUrl')
2098 if dash_manifest_url:
2099 for f in self._extract_mpd_formats(
2100 dash_manifest_url, video_id, fatal=False):
2101 itag = f['format_id']
2102 if itag in itags:
2103 continue
2104 if itag in itag_qualities:
2105 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
2106 # but kept to maintain feature parity (and code similarity) with youtube-dl
2107 # Remove if this causes any issues with sorting in future
2108 f['quality'] = q(itag_qualities[itag])
2109 filesize = int_or_none(self._search_regex(
2110 r'/clen/(\d+)', f.get('fragment_base_url')
2111 or f['url'], 'file size', default=None))
2112 if filesize:
2113 f['filesize'] = filesize
2114 formats.append(f)
bf1317d2 2115
545cc85d 2116 if not formats:
63ad4d43 2117 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2118 self.raise_no_formats(
545cc85d 2119 'This video is DRM protected.', expected=True)
2120 pemr = try_get(
2121 playability_status,
2122 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2123 dict) or {}
2124 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2125 subreason = pemr.get('subreason')
2126 if subreason:
2127 subreason = clean_html(get_text(subreason))
2128 if subreason == 'The uploader has not made this video available in your country.':
2129 countries = microformat.get('availableCountries')
2130 if not countries:
2131 regions_allowed = search_meta('regionsAllowed')
2132 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2133 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2134 reason += '\n' + subreason
2135 if reason:
b7da73eb 2136 self.raise_no_formats(reason, expected=True)
bf1317d2 2137
545cc85d 2138 self._sort_formats(formats)
bf1317d2 2139
545cc85d 2140 keywords = video_details.get('keywords') or []
2141 if not keywords and webpage:
2142 keywords = [
2143 unescapeHTML(m.group('content'))
2144 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2145 for keyword in keywords:
2146 if keyword.startswith('yt:stretch='):
201c1459 2147 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2148 if mobj:
2149 # NB: float is intentional for forcing float division
2150 w, h = (float(v) for v in mobj.groups())
2151 if w > 0 and h > 0:
2152 ratio = w / h
2153 for f in formats:
2154 if f.get('vcodec') != 'none':
2155 f['stretched_ratio'] = ratio
2156 break
6449cd80 2157
545cc85d 2158 thumbnails = []
2159 for container in (video_details, microformat):
2160 for thumbnail in (try_get(
2161 container,
2162 lambda x: x['thumbnail']['thumbnails'], list) or []):
2163 thumbnail_url = thumbnail.get('url')
2164 if not thumbnail_url:
bf1317d2 2165 continue
1988fab7 2166 # Sometimes youtube gives a wrong thumbnail URL. See:
2167 # https://github.com/yt-dlp/yt-dlp/issues/233
2168 # https://github.com/ytdl-org/youtube-dl/issues/28023
2169 if 'maxresdefault' in thumbnail_url:
2170 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2171 thumbnails.append({
2172 'height': int_or_none(thumbnail.get('height')),
2173 'url': thumbnail_url,
2174 'width': int_or_none(thumbnail.get('width')),
2175 })
2176 if thumbnails:
2177 break
a6211d23 2178 else:
545cc85d 2179 thumbnail = search_meta(['og:image', 'twitter:image'])
2180 if thumbnail:
2181 thumbnails = [{'url': thumbnail}]
2182
2183 category = microformat.get('category') or search_meta('genre')
2184 channel_id = video_details.get('channelId') \
2185 or microformat.get('externalChannelId') \
2186 or search_meta('channelId')
2187 duration = int_or_none(
2188 video_details.get('lengthSeconds')
2189 or microformat.get('lengthSeconds')) \
2190 or parse_duration(search_meta('duration'))
2191 is_live = video_details.get('isLive')
2192 owner_profile_url = microformat.get('ownerProfileUrl')
2193
2194 info = {
2195 'id': video_id,
2196 'title': self._live_title(video_title) if is_live else video_title,
2197 'formats': formats,
2198 'thumbnails': thumbnails,
2199 'description': video_description,
2200 'upload_date': unified_strdate(
2201 microformat.get('uploadDate')
2202 or search_meta('uploadDate')),
2203 'uploader': video_details['author'],
2204 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2205 'uploader_url': owner_profile_url,
2206 'channel_id': channel_id,
2207 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2208 'duration': duration,
2209 'view_count': int_or_none(
2210 video_details.get('viewCount')
2211 or microformat.get('viewCount')
2212 or search_meta('interactionCount')),
2213 'average_rating': float_or_none(video_details.get('averageRating')),
2214 'age_limit': 18 if (
2215 microformat.get('isFamilySafe') is False
2216 or search_meta('isFamilyFriendly') == 'false'
2217 or search_meta('og:restrictions:age') == '18+') else 0,
2218 'webpage_url': webpage_url,
2219 'categories': [category] if category else None,
2220 'tags': keywords,
2221 'is_live': is_live,
2222 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2223 'was_live': video_details.get('isLiveContent'),
545cc85d 2224 }
b477fc13 2225
545cc85d 2226 pctr = try_get(
2227 player_response,
2228 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2229 subtitles = {}
2230 if pctr:
774d79cc 2231 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2232 lang_subs = container.setdefault(lang_code, [])
545cc85d 2233 for fmt in self._SUBTITLE_FORMATS:
2234 query.update({
2235 'fmt': fmt,
2236 })
2237 lang_subs.append({
2238 'ext': fmt,
2239 'url': update_url_query(base_url, query),
774d79cc 2240 'name': sub_name,
545cc85d 2241 })
7e72694b 2242
545cc85d 2243 for caption_track in (pctr.get('captionTracks') or []):
2244 base_url = caption_track.get('baseUrl')
2245 if not base_url:
2246 continue
2247 if caption_track.get('kind') != 'asr':
120916da 2248 lang_code = (
2249 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2250 or caption_track.get('languageCode'))
545cc85d 2251 if not lang_code:
2252 continue
2253 process_language(
774d79cc 2254 subtitles, base_url, lang_code,
2255 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2256 {})
545cc85d 2257 continue
2258 automatic_captions = {}
2259 for translation_language in (pctr.get('translationLanguages') or []):
2260 translation_language_code = translation_language.get('languageCode')
2261 if not translation_language_code:
2262 continue
2263 process_language(
2264 automatic_captions, base_url, translation_language_code,
774d79cc 2265 try_get(translation_language, lambda x: x['languageName']['simpleText']),
545cc85d 2266 {'tlang': translation_language_code})
2267 info['automatic_captions'] = automatic_captions
2268 info['subtitles'] = subtitles
7e72694b 2269
545cc85d 2270 parsed_url = compat_urllib_parse_urlparse(url)
2271 for component in [parsed_url.fragment, parsed_url.query]:
2272 query = compat_parse_qs(component)
2273 for k, v in query.items():
2274 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2275 d_k += '_time'
2276 if d_k not in info and k in s_ks:
2277 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2278
2279 # Youtube Music Auto-generated description
822b9d9c 2280 if video_description:
38d70284 2281 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2282 if mobj:
822b9d9c
RA
2283 release_year = mobj.group('release_year')
2284 release_date = mobj.group('release_date')
2285 if release_date:
2286 release_date = release_date.replace('-', '')
2287 if not release_year:
545cc85d 2288 release_year = release_date[:4]
2289 info.update({
2290 'album': mobj.group('album'.strip()),
2291 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2292 'track': mobj.group('track').strip(),
2293 'release_date': release_date,
cc2db878 2294 'release_year': int_or_none(release_year),
545cc85d 2295 })
7e72694b 2296
545cc85d 2297 initial_data = None
2298 if webpage:
2299 initial_data = self._extract_yt_initial_variable(
2300 webpage, self._YT_INITIAL_DATA_RE, video_id,
2301 'yt initial data')
2302 if not initial_data:
2303 initial_data = self._call_api(
f4f751af 2304 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2305
2306 if not is_live:
2307 try:
2308 # This will error if there is no livechat
2309 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2310 info['subtitles']['live_chat'] = [{
394dcd44 2311 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2312 'video_id': video_id,
2313 'ext': 'json',
2314 'protocol': 'youtube_live_chat_replay',
2315 }]
2316 except (KeyError, IndexError, TypeError):
2317 pass
2318
2319 if initial_data:
2320 chapters = self._extract_chapters_from_json(
2321 initial_data, video_id, duration)
2322 if not chapters:
2323 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2324 contents = try_get(
2325 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2326 list)
2327 if not contents:
2328 continue
2329
2330 def chapter_time(mmlir):
2331 return parse_duration(
2332 get_text(mmlir.get('timeDescription')))
2333
2334 chapters = []
2335 for next_num, content in enumerate(contents, start=1):
2336 mmlir = content.get('macroMarkersListItemRenderer') or {}
2337 start_time = chapter_time(mmlir)
2338 end_time = chapter_time(try_get(
2339 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2340 if next_num < len(contents) else duration
2341 if start_time is None or end_time is None:
2342 continue
2343 chapters.append({
2344 'start_time': start_time,
2345 'end_time': end_time,
2346 'title': get_text(mmlir.get('title')),
2347 })
2348 if chapters:
2349 break
2350 if chapters:
2351 info['chapters'] = chapters
2352
2353 contents = try_get(
2354 initial_data,
2355 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2356 list) or []
2357 for content in contents:
2358 vpir = content.get('videoPrimaryInfoRenderer')
2359 if vpir:
2360 stl = vpir.get('superTitleLink')
2361 if stl:
2362 stl = get_text(stl)
2363 if try_get(
2364 vpir,
2365 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2366 info['location'] = stl
2367 else:
2368 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2369 if mobj:
2370 info.update({
2371 'series': mobj.group(1),
2372 'season_number': int(mobj.group(2)),
2373 'episode_number': int(mobj.group(3)),
2374 })
2375 for tlb in (try_get(
2376 vpir,
2377 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2378 list) or []):
2379 tbr = tlb.get('toggleButtonRenderer') or {}
2380 for getter, regex in [(
2381 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2382 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2383 lambda x: x['accessibility'],
2384 lambda x: x['accessibilityData']['accessibilityData'],
2385 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2386 label = (try_get(tbr, getter, dict) or {}).get('label')
2387 if label:
2388 mobj = re.match(regex, label)
2389 if mobj:
2390 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2391 break
2392 sbr_tooltip = try_get(
2393 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2394 if sbr_tooltip:
2395 like_count, dislike_count = sbr_tooltip.split(' / ')
2396 info.update({
2397 'like_count': str_to_int(like_count),
2398 'dislike_count': str_to_int(dislike_count),
2399 })
2400 vsir = content.get('videoSecondaryInfoRenderer')
2401 if vsir:
2402 info['channel'] = get_text(try_get(
2403 vsir,
2404 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2405 dict))
545cc85d 2406 rows = try_get(
2407 vsir,
2408 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2409 list) or []
2410 multiple_songs = False
2411 for row in rows:
2412 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2413 multiple_songs = True
2414 break
2415 for row in rows:
2416 mrr = row.get('metadataRowRenderer') or {}
2417 mrr_title = mrr.get('title')
2418 if not mrr_title:
2419 continue
2420 mrr_title = get_text(mrr['title'])
2421 mrr_contents_text = get_text(mrr['contents'][0])
2422 if mrr_title == 'License':
2423 info['license'] = mrr_contents_text
2424 elif not multiple_songs:
2425 if mrr_title == 'Album':
2426 info['album'] = mrr_contents_text
2427 elif mrr_title == 'Artist':
2428 info['artist'] = mrr_contents_text
2429 elif mrr_title == 'Song':
2430 info['track'] = mrr_contents_text
2431
2432 fallbacks = {
2433 'channel': 'uploader',
2434 'channel_id': 'uploader_id',
2435 'channel_url': 'uploader_url',
2436 }
2437 for to, frm in fallbacks.items():
2438 if not info.get(to):
2439 info[to] = info.get(frm)
2440
2441 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2442 v = info.get(s_k)
2443 if v:
2444 info[d_k] = v
b84071c0 2445
c224251a
M
2446 is_private = bool_or_none(video_details.get('isPrivate'))
2447 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2448 is_membersonly = None
b28f8d24 2449 is_premium = None
c224251a
M
2450 if initial_data and is_private is not None:
2451 is_membersonly = False
b28f8d24 2452 is_premium = False
c224251a
M
2453 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2454 for content in contents or []:
2455 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2456 for badge in badges or []:
2457 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2458 if label.lower() == 'members only':
2459 is_membersonly = True
2460 break
b28f8d24
M
2461 elif label.lower() == 'premium':
2462 is_premium = True
2463 break
2464 if is_membersonly or is_premium:
c224251a
M
2465 break
2466
2467 # TODO: Add this for playlists
2468 info['availability'] = self._availability(
2469 is_private=is_private,
b28f8d24 2470 needs_premium=is_premium,
c224251a
M
2471 needs_subscription=is_membersonly,
2472 needs_auth=info['age_limit'] >= 18,
2473 is_unlisted=None if is_private is None else is_unlisted)
2474
06167fbb 2475 # get xsrf for annotations or comments
2476 get_annotations = self._downloader.params.get('writeannotations', False)
2477 get_comments = self._downloader.params.get('getcomments', False)
2478 if get_annotations or get_comments:
29f7c58a 2479 xsrf_token = None
545cc85d 2480 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2481 if ytcfg:
2482 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2483 if not xsrf_token:
2484 xsrf_token = self._search_regex(
2485 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2486 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2487
2488 # annotations
06167fbb 2489 if get_annotations:
64b6a4e9
RA
2490 invideo_url = try_get(
2491 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2492 if xsrf_token and invideo_url:
29f7c58a 2493 xsrf_field_name = None
2494 if ytcfg:
2495 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2496 if not xsrf_field_name:
2497 xsrf_field_name = self._search_regex(
2498 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2499 webpage, 'xsrf field name',
29f7c58a 2500 group='xsrf_field_name', default='session_token')
8a784c74 2501 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2502 self._proto_relative_url(invideo_url),
2503 video_id, note='Downloading annotations',
2504 errnote='Unable to download video annotations', fatal=False,
2505 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2506
277d6ff5 2507 if get_comments:
a1c5d2ca 2508 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2509
545cc85d 2510 self.mark_watched(video_id, player_response)
d77ab8e2 2511
545cc85d 2512 return info
c5e8d7af 2513
5f6a1245 2514
8bdd16b4 2515class YoutubeTabIE(YoutubeBaseInfoExtractor):
2516 IE_DESC = 'YouTube.com tab'
70d5c17b 2517 _VALID_URL = r'''(?x)
2518 https?://
2519 (?:\w+\.)?
2520 (?:
2521 youtube(?:kids)?\.com|
2522 invidio\.us
2523 )/
2524 (?:
2525 (?:channel|c|user)/|
2526 (?P<not_channel>
9ba5705a 2527 feed/|hashtag/|
70d5c17b 2528 (?:playlist|watch)\?.*?\blist=
2529 )|
29f7c58a 2530 (?!(?:%s)\b) # Direct URLs
70d5c17b 2531 )
2532 (?P<id>[^/?\#&]+)
2533 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2534 IE_NAME = 'youtube:tab'
2535
81127aa5 2536 _TESTS = [{
8bdd16b4 2537 # playlists, multipage
2538 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2539 'playlist_mincount': 94,
2540 'info_dict': {
2541 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2542 'title': 'Игорь Клейнер - Playlists',
2543 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2544 'uploader': 'Игорь Клейнер',
2545 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2546 },
2547 }, {
2548 # playlists, multipage, different order
2549 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2550 'playlist_mincount': 94,
2551 'info_dict': {
2552 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2553 'title': 'Игорь Клейнер - Playlists',
2554 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2555 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2556 'uploader': 'Игорь Клейнер',
8bdd16b4 2557 },
201c1459 2558 }, {
2559 # playlists, series
2560 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2561 'playlist_mincount': 5,
2562 'info_dict': {
2563 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2564 'title': '3Blue1Brown - Playlists',
2565 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2566 },
8bdd16b4 2567 }, {
2568 # playlists, singlepage
2569 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2570 'playlist_mincount': 4,
2571 'info_dict': {
2572 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2573 'title': 'ThirstForScience - Playlists',
2574 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2575 'uploader': 'ThirstForScience',
2576 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2577 }
2578 }, {
2579 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2580 'only_matching': True,
2581 }, {
2582 # basic, single video playlist
0e30a7b9 2583 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2584 'info_dict': {
0e30a7b9 2585 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2586 'uploader': 'Sergey M.',
2587 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2588 'title': 'youtube-dl public playlist',
81127aa5 2589 },
0e30a7b9 2590 'playlist_count': 1,
9291475f 2591 }, {
8bdd16b4 2592 # empty playlist
0e30a7b9 2593 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2594 'info_dict': {
0e30a7b9 2595 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2596 'uploader': 'Sergey M.',
2597 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2598 'title': 'youtube-dl empty playlist',
9291475f
PH
2599 },
2600 'playlist_count': 0,
2601 }, {
8bdd16b4 2602 # Home tab
2603 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2604 'info_dict': {
8bdd16b4 2605 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2606 'title': 'lex will - Home',
2607 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2608 'uploader': 'lex will',
2609 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2610 },
8bdd16b4 2611 'playlist_mincount': 2,
9291475f 2612 }, {
8bdd16b4 2613 # Videos tab
2614 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2615 'info_dict': {
8bdd16b4 2616 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2617 'title': 'lex will - Videos',
2618 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2619 'uploader': 'lex will',
2620 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2621 },
8bdd16b4 2622 'playlist_mincount': 975,
9291475f 2623 }, {
8bdd16b4 2624 # Videos tab, sorted by popular
2625 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2626 'info_dict': {
8bdd16b4 2627 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2628 'title': 'lex will - Videos',
2629 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2630 'uploader': 'lex will',
2631 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2632 },
8bdd16b4 2633 'playlist_mincount': 199,
9291475f 2634 }, {
8bdd16b4 2635 # Playlists tab
2636 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2637 'info_dict': {
8bdd16b4 2638 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2639 'title': 'lex will - Playlists',
2640 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2641 'uploader': 'lex will',
2642 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2643 },
8bdd16b4 2644 'playlist_mincount': 17,
ac7553d0 2645 }, {
8bdd16b4 2646 # Community tab
2647 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2648 'info_dict': {
8bdd16b4 2649 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2650 'title': 'lex will - Community',
2651 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2652 'uploader': 'lex will',
2653 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2654 },
2655 'playlist_mincount': 18,
87dadd45 2656 }, {
8bdd16b4 2657 # Channels tab
2658 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2659 'info_dict': {
8bdd16b4 2660 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2661 'title': 'lex will - Channels',
2662 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2663 'uploader': 'lex will',
2664 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2665 },
deaec5af 2666 'playlist_mincount': 12,
6b08cdf6 2667 }, {
a0566bbf 2668 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2669 'only_matching': True,
2670 }, {
a0566bbf 2671 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2672 'only_matching': True,
2673 }, {
a0566bbf 2674 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2675 'only_matching': True,
2676 }, {
2677 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2678 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2679 'info_dict': {
2680 'title': '29C3: Not my department',
2681 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2682 'uploader': 'Christiaan008',
2683 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2684 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2685 },
2686 'playlist_count': 96,
2687 }, {
2688 'note': 'Large playlist',
2689 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2690 'info_dict': {
8bdd16b4 2691 'title': 'Uploads from Cauchemar',
2692 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2693 'uploader': 'Cauchemar',
2694 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2695 },
8bdd16b4 2696 'playlist_mincount': 1123,
2697 }, {
2698 # even larger playlist, 8832 videos
2699 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2700 'only_matching': True,
4b7df0d3
JMF
2701 }, {
2702 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2703 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2704 'info_dict': {
acf757f4
PH
2705 'title': 'Uploads from Interstellar Movie',
2706 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2707 'uploader': 'Interstellar Movie',
8bdd16b4 2708 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2709 },
481cc733 2710 'playlist_mincount': 21,
358de58c 2711 }, {
2712 'note': 'Playlist with "show unavailable videos" button',
2713 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2714 'info_dict': {
2715 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2716 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2717 'uploader': 'Phim Siêu Nhân Nhật Bản',
2718 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2719 },
2720 'playlist_mincount': 1400,
2721 'expected_warnings': [
2722 'YouTube said: INFO - Unavailable videos are hidden',
2723 ]
5d342002 2724 }, {
2725 'note': 'Playlist with unavailable videos in a later page',
2726 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2727 'info_dict': {
2728 'title': 'Uploads from BlankTV',
2729 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2730 'uploader': 'BlankTV',
2731 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2732 },
2733 'playlist_mincount': 20000,
8bdd16b4 2734 }, {
2735 # https://github.com/ytdl-org/youtube-dl/issues/21844
2736 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2737 'info_dict': {
2738 'title': 'Data Analysis with Dr Mike Pound',
2739 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2740 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2741 'uploader': 'Computerphile',
deaec5af 2742 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2743 },
2744 'playlist_mincount': 11,
2745 }, {
a0566bbf 2746 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2747 'only_matching': True,
dacb3a86
S
2748 }, {
2749 # Playlist URL that does not actually serve a playlist
2750 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2751 'info_dict': {
2752 'id': 'FqZTN594JQw',
2753 'ext': 'webm',
2754 'title': "Smiley's People 01 detective, Adventure Series, Action",
2755 'uploader': 'STREEM',
2756 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2757 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2758 'upload_date': '20150526',
2759 'license': 'Standard YouTube License',
2760 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2761 'categories': ['People & Blogs'],
2762 'tags': list,
dbdaaa23 2763 'view_count': int,
dacb3a86
S
2764 'like_count': int,
2765 'dislike_count': int,
2766 },
2767 'params': {
2768 'skip_download': True,
2769 },
13a75688 2770 'skip': 'This video is not available.',
dacb3a86 2771 'add_ie': [YoutubeIE.ie_key()],
481cc733 2772 }, {
8bdd16b4 2773 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2774 'only_matching': True,
66b48727 2775 }, {
8bdd16b4 2776 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2777 'only_matching': True,
a0566bbf 2778 }, {
2779 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2780 'info_dict': {
2781 'id': '9Auq9mYxFEE',
2782 'ext': 'mp4',
deaec5af 2783 'title': compat_str,
a0566bbf 2784 'uploader': 'Sky News',
2785 'uploader_id': 'skynews',
2786 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2787 'upload_date': '20191102',
deaec5af 2788 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2789 'categories': ['News & Politics'],
2790 'tags': list,
2791 'like_count': int,
2792 'dislike_count': int,
2793 },
2794 'params': {
2795 'skip_download': True,
2796 },
2797 }, {
2798 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2799 'info_dict': {
2800 'id': 'a48o2S1cPoo',
2801 'ext': 'mp4',
2802 'title': 'The Young Turks - Live Main Show',
2803 'uploader': 'The Young Turks',
2804 'uploader_id': 'TheYoungTurks',
2805 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2806 'upload_date': '20150715',
2807 'license': 'Standard YouTube License',
2808 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2809 'categories': ['News & Politics'],
2810 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2811 'like_count': int,
2812 'dislike_count': int,
2813 },
2814 'params': {
2815 'skip_download': True,
2816 },
2817 'only_matching': True,
2818 }, {
2819 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2820 'only_matching': True,
2821 }, {
2822 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2823 'only_matching': True,
3d3dddc9 2824 }, {
2825 'url': 'https://www.youtube.com/feed/trending',
2826 'only_matching': True,
2827 }, {
2828 # needs auth
2829 'url': 'https://www.youtube.com/feed/library',
2830 'only_matching': True,
2831 }, {
2832 # needs auth
2833 'url': 'https://www.youtube.com/feed/history',
2834 'only_matching': True,
2835 }, {
2836 # needs auth
2837 'url': 'https://www.youtube.com/feed/subscriptions',
2838 'only_matching': True,
2839 }, {
2840 # needs auth
2841 'url': 'https://www.youtube.com/feed/watch_later',
2842 'only_matching': True,
2843 }, {
2844 # no longer available?
2845 'url': 'https://www.youtube.com/feed/recommended',
2846 'only_matching': True,
29f7c58a 2847 }, {
2848 # inline playlist with not always working continuations
2849 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2850 'only_matching': True,
2851 }, {
2852 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2853 'only_matching': True,
2854 }, {
2855 'url': 'https://www.youtube.com/course',
2856 'only_matching': True,
2857 }, {
2858 'url': 'https://www.youtube.com/zsecurity',
2859 'only_matching': True,
2860 }, {
2861 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2862 'only_matching': True,
2863 }, {
2864 'url': 'https://www.youtube.com/TheYoungTurks/live',
2865 'only_matching': True,
39ed931e 2866 }, {
2867 'url': 'https://www.youtube.com/hashtag/cctv9',
2868 'info_dict': {
2869 'id': 'cctv9',
2870 'title': '#cctv9',
2871 },
2872 'playlist_mincount': 350,
201c1459 2873 }, {
2874 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2875 'only_matching': True,
9297939e 2876 }, {
2877 # Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist.
2878 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2879 'only_matching': True
29f7c58a 2880 }]
2881
2882 @classmethod
2883 def suitable(cls, url):
2884 return False if YoutubeIE.suitable(url) else super(
2885 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2886
2887 def _extract_channel_id(self, webpage):
2888 channel_id = self._html_search_meta(
2889 'channelId', webpage, 'channel id', default=None)
2890 if channel_id:
2891 return channel_id
2892 channel_url = self._html_search_meta(
2893 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2894 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2895 'twitter:app:url:googleplay'), webpage, 'channel url')
2896 return self._search_regex(
2897 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2898 channel_url, 'channel id')
15f6397c 2899
8bdd16b4 2900 @staticmethod
cd7c66cf 2901 def _extract_basic_item_renderer(item):
2902 # Modified from _extract_grid_item_renderer
201c1459 2903 known_basic_renderers = (
2904 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 2905 )
2906 for key, renderer in item.items():
201c1459 2907 if not isinstance(renderer, dict):
cd7c66cf 2908 continue
201c1459 2909 elif key in known_basic_renderers:
2910 return renderer
2911 elif key.startswith('grid') and key.endswith('Renderer'):
2912 return renderer
8bdd16b4 2913
8bdd16b4 2914 def _grid_entries(self, grid_renderer):
2915 for item in grid_renderer['items']:
2916 if not isinstance(item, dict):
39b62db1 2917 continue
cd7c66cf 2918 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2919 if not isinstance(renderer, dict):
2920 continue
2921 title = try_get(
201c1459 2922 renderer, (lambda x: x['title']['runs'][0]['text'],
2923 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 2924 # playlist
2925 playlist_id = renderer.get('playlistId')
2926 if playlist_id:
2927 yield self.url_result(
2928 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2929 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2930 video_title=title)
201c1459 2931 continue
8bdd16b4 2932 # video
2933 video_id = renderer.get('videoId')
2934 if video_id:
2935 yield self._extract_video(renderer)
201c1459 2936 continue
8bdd16b4 2937 # channel
2938 channel_id = renderer.get('channelId')
2939 if channel_id:
2940 title = try_get(
2941 renderer, lambda x: x['title']['simpleText'], compat_str)
2942 yield self.url_result(
2943 'https://www.youtube.com/channel/%s' % channel_id,
2944 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 2945 continue
2946 # generic endpoint URL support
2947 ep_url = urljoin('https://www.youtube.com/', try_get(
2948 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
2949 compat_str))
2950 if ep_url:
2951 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
2952 if ie.suitable(ep_url):
2953 yield self.url_result(
2954 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
2955 break
8bdd16b4 2956
3d3dddc9 2957 def _shelf_entries_from_content(self, shelf_renderer):
2958 content = shelf_renderer.get('content')
2959 if not isinstance(content, dict):
8bdd16b4 2960 return
cd7c66cf 2961 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2962 if renderer:
2963 # TODO: add support for nested playlists so each shelf is processed
2964 # as separate playlist
2965 # TODO: this includes only first N items
2966 for entry in self._grid_entries(renderer):
2967 yield entry
2968 renderer = content.get('horizontalListRenderer')
2969 if renderer:
2970 # TODO
2971 pass
8bdd16b4 2972
29f7c58a 2973 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2974 ep = try_get(
2975 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2976 compat_str)
2977 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2978 if shelf_url:
29f7c58a 2979 # Skipping links to another channels, note that checking for
2980 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2981 # will not work
2982 if skip_channels and '/channels?' in shelf_url:
2983 return
3d3dddc9 2984 title = try_get(
2985 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2986 yield self.url_result(shelf_url, video_title=title)
2987 # Shelf may not contain shelf URL, fallback to extraction from content
2988 for entry in self._shelf_entries_from_content(shelf_renderer):
2989 yield entry
c5e8d7af 2990
8bdd16b4 2991 def _playlist_entries(self, video_list_renderer):
2992 for content in video_list_renderer['contents']:
2993 if not isinstance(content, dict):
2994 continue
2995 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2996 if not isinstance(renderer, dict):
2997 continue
2998 video_id = renderer.get('videoId')
2999 if not video_id:
3000 continue
3001 yield self._extract_video(renderer)
07aeced6 3002
3462ffa8 3003 def _rich_entries(self, rich_grid_renderer):
3004 renderer = try_get(
70d5c17b 3005 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3006 video_id = renderer.get('videoId')
3007 if not video_id:
3008 return
3009 yield self._extract_video(renderer)
3010
8bdd16b4 3011 def _video_entry(self, video_renderer):
3012 video_id = video_renderer.get('videoId')
3013 if video_id:
3014 return self._extract_video(video_renderer)
dacb3a86 3015
8bdd16b4 3016 def _post_thread_entries(self, post_thread_renderer):
3017 post_renderer = try_get(
3018 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3019 if not post_renderer:
3020 return
3021 # video attachment
3022 video_renderer = try_get(
895b0931 3023 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3024 video_id = video_renderer.get('videoId')
3025 if video_id:
3026 entry = self._extract_video(video_renderer)
8bdd16b4 3027 if entry:
3028 yield entry
895b0931 3029 # playlist attachment
3030 playlist_id = try_get(
3031 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3032 if playlist_id:
3033 yield self.url_result(
e28f1c0a 3034 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3035 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3036 # inline video links
3037 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3038 for run in runs:
3039 if not isinstance(run, dict):
3040 continue
3041 ep_url = try_get(
3042 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3043 if not ep_url:
3044 continue
3045 if not YoutubeIE.suitable(ep_url):
3046 continue
3047 ep_video_id = YoutubeIE._match_id(ep_url)
3048 if video_id == ep_video_id:
3049 continue
895b0931 3050 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3051
8bdd16b4 3052 def _post_thread_continuation_entries(self, post_thread_continuation):
3053 contents = post_thread_continuation.get('contents')
3054 if not isinstance(contents, list):
3055 return
3056 for content in contents:
3057 renderer = content.get('backstagePostThreadRenderer')
3058 if not isinstance(renderer, dict):
3059 continue
3060 for entry in self._post_thread_entries(renderer):
3061 yield entry
07aeced6 3062
39ed931e 3063 r''' # unused
3064 def _rich_grid_entries(self, contents):
3065 for content in contents:
3066 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3067 if video_renderer:
3068 entry = self._video_entry(video_renderer)
3069 if entry:
3070 yield entry
3071 '''
3072
29f7c58a 3073 @staticmethod
3074 def _build_continuation_query(continuation, ctp=None):
3075 query = {
3076 'ctoken': continuation,
3077 'continuation': continuation,
3078 }
3079 if ctp:
3080 query['itct'] = ctp
3081 return query
3082
8bdd16b4 3083 @staticmethod
3084 def _extract_next_continuation_data(renderer):
3085 next_continuation = try_get(
3086 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3087 if not next_continuation:
3088 return
3089 continuation = next_continuation.get('continuation')
3090 if not continuation:
3091 return
3092 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3093 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3094
8bdd16b4 3095 @classmethod
3096 def _extract_continuation(cls, renderer):
3097 next_continuation = cls._extract_next_continuation_data(renderer)
3098 if next_continuation:
3099 return next_continuation
cc2db878 3100 contents = []
3101 for key in ('contents', 'items'):
3102 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3103 for content in contents:
3104 if not isinstance(content, dict):
3105 continue
3106 continuation_ep = try_get(
3107 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3108 dict)
3109 if not continuation_ep:
3110 continue
3111 continuation = try_get(
3112 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3113 if not continuation:
3114 continue
3115 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3116 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3117
f4f751af 3118 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3119
70d5c17b 3120 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3121 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3122 for content in contents:
3123 if not isinstance(content, dict):
8bdd16b4 3124 continue
70d5c17b 3125 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3126 if not is_renderer:
70d5c17b 3127 renderer = content.get('richItemRenderer')
3462ffa8 3128 if renderer:
3129 for entry in self._rich_entries(renderer):
3130 yield entry
3131 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3132 continue
3462ffa8 3133 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3134 for isr_content in isr_contents:
3135 if not isinstance(isr_content, dict):
3136 continue
69184e41 3137
3138 known_renderers = {
3139 'playlistVideoListRenderer': self._playlist_entries,
3140 'gridRenderer': self._grid_entries,
3141 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3142 'backstagePostThreadRenderer': self._post_thread_entries,
3143 'videoRenderer': lambda x: [self._video_entry(x)],
3144 }
3145 for key, renderer in isr_content.items():
3146 if key not in known_renderers:
3147 continue
3148 for entry in known_renderers[key](renderer):
3149 if entry:
3150 yield entry
3462ffa8 3151 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3152 break
70d5c17b 3153
3462ffa8 3154 if not continuation_list[0]:
3155 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3156
3157 if not continuation_list[0]:
3158 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3159
3160 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3161 tab_content = try_get(tab, lambda x: x['content'], dict)
3162 if not tab_content:
3163 return
3462ffa8 3164 parent_renderer = (
29f7c58a 3165 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3166 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3167 for entry in extract_entries(parent_renderer):
3168 yield entry
3462ffa8 3169 continuation = continuation_list[0]
f4f751af 3170 context = self._extract_context(ytcfg)
3171 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3172
8bdd16b4 3173 for page_num in itertools.count(1):
3174 if not continuation:
3175 break
79360d99 3176 query = {
3177 'continuation': continuation['continuation'],
3178 'clickTracking': {'clickTrackingParams': continuation['itct']}
3179 }
f4f751af 3180 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3181 response = self._extract_response(
3182 item_id='%s page %s' % (item_id, page_num),
3183 query=query, headers=headers, ytcfg=ytcfg,
3184 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3185
3186 if not response:
8bdd16b4 3187 break
f4f751af 3188 visitor_data = try_get(
3189 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3190
69184e41 3191 known_continuation_renderers = {
3192 'playlistVideoListContinuation': self._playlist_entries,
3193 'gridContinuation': self._grid_entries,
3194 'itemSectionContinuation': self._post_thread_continuation_entries,
3195 'sectionListContinuation': extract_entries, # for feeds
3196 }
8bdd16b4 3197 continuation_contents = try_get(
69184e41 3198 response, lambda x: x['continuationContents'], dict) or {}
3199 continuation_renderer = None
3200 for key, value in continuation_contents.items():
3201 if key not in known_continuation_renderers:
3462ffa8 3202 continue
69184e41 3203 continuation_renderer = value
3204 continuation_list = [None]
3205 for entry in known_continuation_renderers[key](continuation_renderer):
3206 yield entry
3207 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3208 break
3209 if continuation_renderer:
3210 continue
c5e8d7af 3211
a1b535bd 3212 known_renderers = {
3213 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3214 'gridVideoRenderer': (self._grid_entries, 'items'),
3215 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3216 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3217 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3218 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3219 }
cce889b9 3220 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3221 continuation_items = try_get(
cce889b9 3222 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3223 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3224 video_items_renderer = None
3225 for key, value in continuation_item.items():
3226 if key not in known_renderers:
8bdd16b4 3227 continue
a1b535bd 3228 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3229 continuation_list = [None]
a1b535bd 3230 for entry in known_renderers[key][0](video_items_renderer):
3231 yield entry
9ba5705a 3232 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3233 break
3234 if video_items_renderer:
3235 continue
8bdd16b4 3236 break
9558dcec 3237
8bdd16b4 3238 @staticmethod
3239 def _extract_selected_tab(tabs):
3240 for tab in tabs:
3241 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3242 return tab['tabRenderer']
2b3c2546 3243 else:
8bdd16b4 3244 raise ExtractorError('Unable to find selected tab')
b82f815f 3245
8bdd16b4 3246 @staticmethod
3247 def _extract_uploader(data):
3248 uploader = {}
3249 sidebar_renderer = try_get(
3250 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3251 if sidebar_renderer:
3252 for item in sidebar_renderer:
3253 if not isinstance(item, dict):
3254 continue
3255 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3256 if not isinstance(renderer, dict):
3257 continue
3258 owner = try_get(
3259 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3260 if owner:
3261 uploader['uploader'] = owner.get('text')
3262 uploader['uploader_id'] = try_get(
3263 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3264 uploader['uploader_url'] = urljoin(
3265 'https://www.youtube.com/',
3266 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3267 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3268
d069eca7 3269 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3270 playlist_id = title = description = channel_url = channel_name = channel_id = None
3271 thumbnails_list = tags = []
3272
8bdd16b4 3273 selected_tab = self._extract_selected_tab(tabs)
3274 renderer = try_get(
3275 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3276 if renderer:
b60419c5 3277 channel_name = renderer.get('title')
3278 channel_url = renderer.get('channelUrl')
3279 channel_id = renderer.get('externalId')
39ed931e 3280 else:
64c0d954 3281 renderer = try_get(
3282 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3283
8bdd16b4 3284 if renderer:
3285 title = renderer.get('title')
ecc97af3 3286 description = renderer.get('description', '')
b60419c5 3287 playlist_id = channel_id
3288 tags = renderer.get('keywords', '').split()
3289 thumbnails_list = (
3290 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3291 or try_get(
3292 data,
3293 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3294 list)
b60419c5 3295 or [])
3296
3297 thumbnails = []
3298 for t in thumbnails_list:
3299 if not isinstance(t, dict):
3300 continue
3301 thumbnail_url = url_or_none(t.get('url'))
3302 if not thumbnail_url:
3303 continue
3304 thumbnails.append({
3305 'url': thumbnail_url,
3306 'width': int_or_none(t.get('width')),
3307 'height': int_or_none(t.get('height')),
3308 })
3462ffa8 3309 if playlist_id is None:
70d5c17b 3310 playlist_id = item_id
3311 if title is None:
39ed931e 3312 title = (
3313 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3314 or playlist_id)
b60419c5 3315 title += format_field(selected_tab, 'title', ' - %s')
3316
3317 metadata = {
3318 'playlist_id': playlist_id,
3319 'playlist_title': title,
3320 'playlist_description': description,
3321 'uploader': channel_name,
3322 'uploader_id': channel_id,
3323 'uploader_url': channel_url,
3324 'thumbnails': thumbnails,
3325 'tags': tags,
3326 }
3327 if not channel_id:
3328 metadata.update(self._extract_uploader(data))
3329 metadata.update({
3330 'channel': metadata['uploader'],
3331 'channel_id': metadata['uploader_id'],
3332 'channel_url': metadata['uploader_url']})
3333 return self.playlist_result(
d069eca7
M
3334 self._entries(
3335 selected_tab, playlist_id,
3336 self._extract_identity_token(webpage, item_id),
f4f751af 3337 self._extract_account_syncid(data),
3338 self._extract_ytcfg(item_id, webpage)),
b60419c5 3339 **metadata)
73c4ac2c 3340
79360d99 3341 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3342 first_id = last_id = None
79360d99 3343 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3344 headers = self._generate_api_headers(
3345 ytcfg, account_syncid=self._extract_account_syncid(data),
3346 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3347 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3348 for page_num in itertools.count(1):
cd7c66cf 3349 videos = list(self._playlist_entries(playlist))
3350 if not videos:
3351 return
2be71994 3352 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3353 if start >= len(videos):
3354 return
3355 for video in videos[start:]:
3356 if video['id'] == first_id:
3357 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3358 return
3359 yield video
3360 first_id = first_id or videos[0]['id']
3361 last_id = videos[-1]['id']
79360d99 3362 watch_endpoint = try_get(
3363 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3364 query = {
3365 'playlistId': playlist_id,
3366 'videoId': watch_endpoint.get('videoId') or last_id,
3367 'index': watch_endpoint.get('index') or len(videos),
3368 'params': watch_endpoint.get('params') or 'OAE%3D'
3369 }
3370 response = self._extract_response(
3371 item_id='%s page %d' % (playlist_id, page_num),
3372 query=query,
3373 ep='next',
3374 headers=headers,
3375 check_get_keys='contents'
3376 )
cd7c66cf 3377 playlist = try_get(
79360d99 3378 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3379
79360d99 3380 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3381 title = playlist.get('title') or try_get(
3382 data, lambda x: x['titleText']['simpleText'], compat_str)
3383 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3384
3385 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3386 playlist_url = urljoin(url, try_get(
3387 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3388 compat_str))
3389 if playlist_url and playlist_url != url:
3390 return self.url_result(
3391 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3392 video_title=title)
cd7c66cf 3393
8bdd16b4 3394 return self.playlist_result(
79360d99 3395 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3396 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3397
f3eaa8dd
M
3398 def _extract_alerts(self, data, expected=False):
3399
3400 def _real_extract_alerts():
3401 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3402 if not isinstance(alert_dict, dict):
02ced43c 3403 continue
f3eaa8dd
M
3404 for alert in alert_dict.values():
3405 alert_type = alert.get('type')
3406 if not alert_type:
3407 continue
3ffc7c89 3408 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
02ced43c 3409 if message:
3410 yield alert_type, message
f3eaa8dd 3411 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3ffc7c89 3412 message += try_get(run, lambda x: x['text'], compat_str)
3413 if message:
3414 yield alert_type, message
f3eaa8dd 3415
3ffc7c89 3416 errors = []
3417 warnings = []
f3eaa8dd
M
3418 for alert_type, alert_message in _real_extract_alerts():
3419 if alert_type.lower() == 'error':
3ffc7c89 3420 errors.append([alert_type, alert_message])
f3eaa8dd 3421 else:
3ffc7c89 3422 warnings.append([alert_type, alert_message])
f3eaa8dd 3423
3ffc7c89 3424 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3425 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3426 if errors:
3427 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3428
358de58c 3429 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3430 """
3431 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3432 """
3433 sidebar_renderer = try_get(
5d342002 3434 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3435 if not sidebar_renderer:
3436 return
3437 browse_id = params = None
358de58c 3438 for item in sidebar_renderer:
3439 if not isinstance(item, dict):
3440 continue
3441 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3442 menu_renderer = try_get(
3443 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3444 for menu_item in menu_renderer:
3445 if not isinstance(menu_item, dict):
3446 continue
3447 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3448 text = try_get(
3449 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3450 if not text or text.lower() != 'show unavailable videos':
3451 continue
3452 browse_endpoint = try_get(
3453 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3454 browse_id = browse_endpoint.get('browseId')
3455 params = browse_endpoint.get('params')
5d342002 3456 break
3457
3458 ytcfg = self._extract_ytcfg(item_id, webpage)
3459 headers = self._generate_api_headers(
3460 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3461 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3462 visitor_data=try_get(
3463 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3464 query = {
3465 'params': params or 'wgYCCAA=',
3466 'browseId': browse_id or 'VL%s' % item_id
3467 }
3468 return self._extract_response(
3469 item_id=item_id, headers=headers, query=query,
3470 check_get_keys='contents', fatal=False,
3471 note='Downloading API JSON with unavailable videos')
358de58c 3472
79360d99 3473 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3474 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3475 response = None
3476 last_error = None
3477 count = -1
3478 retries = self._downloader.params.get('extractor_retries', 3)
3479 if check_get_keys is None:
3480 check_get_keys = []
3481 while count < retries:
3482 count += 1
3483 if last_error:
3484 self.report_warning('%s. Retrying ...' % last_error)
3485 try:
3486 response = self._call_api(
3487 ep=ep, fatal=True, headers=headers,
358de58c 3488 video_id=item_id, query=query,
79360d99 3489 context=self._extract_context(ytcfg),
3490 api_key=self._extract_api_key(ytcfg),
3491 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3492 except ExtractorError as e:
3493 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3494 # Downloading page may result in intermittent 5xx HTTP error
3495 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3496 last_error = 'HTTP Error %s' % e.cause.code
3497 if count < retries:
3498 continue
358de58c 3499 if fatal:
3500 raise
3501 else:
3502 self.report_warning(error_to_compat_str(e))
3503 return
3504
79360d99 3505 else:
3506 # Youtube may send alerts if there was an issue with the continuation page
3507 self._extract_alerts(response, expected=False)
3508 if not check_get_keys or dict_get(response, check_get_keys):
3509 break
3510 # Youtube sometimes sends incomplete data
3511 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3512 last_error = 'Incomplete data received'
3513 if count >= retries:
358de58c 3514 if fatal:
3515 raise ExtractorError(last_error)
3516 else:
3517 self.report_warning(last_error)
3518 return
79360d99 3519 return response
3520
cd7c66cf 3521 def _extract_webpage(self, url, item_id):
62bff2c1 3522 retries = self._downloader.params.get('extractor_retries', 3)
3523 count = -1
c705177d 3524 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3525 while count < retries:
62bff2c1 3526 count += 1
14fdfea9 3527 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3528 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3529 if count:
c705177d 3530 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3531 webpage = self._download_webpage(
3532 url, item_id,
cd7c66cf 3533 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3534 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3535 self._extract_alerts(data, expected=True)
14fdfea9 3536 if data.get('contents') or data.get('currentVideoEndpoint'):
3537 break
c705177d 3538 if count >= retries:
6a39ee13 3539 raise ExtractorError(last_error)
cd7c66cf 3540 return webpage, data
3541
9297939e 3542 @staticmethod
3543 def _smuggle_data(entries, data):
3544 for entry in entries:
3545 if data:
3546 entry['url'] = smuggle_url(entry['url'], data)
3547 yield entry
3548
cd7c66cf 3549 def _real_extract(self, url):
9297939e 3550 url, smuggled_data = unsmuggle_url(url, {})
3551 if self.is_music_url(url):
3552 smuggled_data['is_music_url'] = True
3553 info_dict = self.__real_extract(url)
3554 if info_dict.get('entries'):
3555 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3556 return info_dict
3557
3558 def __real_extract(self, url):
cd7c66cf 3559 item_id = self._match_id(url)
3560 url = compat_urlparse.urlunparse(
3561 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
53ed7066 3562 compat_opts = self._downloader.params.get('compat_opts', [])
cd7c66cf 3563
3564 # This is not matched in a channel page with a tab selected
3565 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3566 mobj = mobj.groupdict() if mobj else {}
53ed7066 3567 if mobj and not mobj.get('not_channel') and 'no-youtube-channel-redirect' not in compat_opts:
6a39ee13 3568 self.report_warning(
cd7c66cf 3569 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3570 'To download only the videos in the home page, add a "/featured" to the URL')
3571 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3572
3573 # Handle both video/playlist URLs
201c1459 3574 qs = parse_qs(url)
cd7c66cf 3575 video_id = qs.get('v', [None])[0]
3576 playlist_id = qs.get('list', [None])[0]
3577
3578 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3579 if not playlist_id:
3580 # If there is neither video or playlist ids,
3581 # youtube redirects to home page, which is undesirable
3582 raise ExtractorError('Unable to recognize tab page')
6a39ee13 3583 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3584 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3585
3586 if video_id and playlist_id:
3587 if self._downloader.params.get('noplaylist'):
3588 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3589 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3590 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3591
3592 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3593
358de58c 3594 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3595 if 'no-youtube-unavailable-videos' not in compat_opts:
3596 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
358de58c 3597
8bdd16b4 3598 tabs = try_get(
3599 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3600 if tabs:
d069eca7 3601 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3602
8bdd16b4 3603 playlist = try_get(
3604 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3605 if playlist:
79360d99 3606 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3607
a0566bbf 3608 video_id = try_get(
3609 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3610 compat_str) or video_id
8bdd16b4 3611 if video_id:
6a39ee13 3612 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3613 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3614
8bdd16b4 3615 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3616
c5e8d7af 3617
8bdd16b4 3618class YoutubePlaylistIE(InfoExtractor):
3619 IE_DESC = 'YouTube.com playlists'
3620 _VALID_URL = r'''(?x)(?:
3621 (?:https?://)?
3622 (?:\w+\.)?
3623 (?:
3624 (?:
3625 youtube(?:kids)?\.com|
29f7c58a 3626 invidio\.us
8bdd16b4 3627 )
3628 /.*?\?.*?\blist=
3629 )?
3630 (?P<id>%(playlist_id)s)
3631 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3632 IE_NAME = 'youtube:playlist'
cdc628a4 3633 _TESTS = [{
8bdd16b4 3634 'note': 'issue #673',
3635 'url': 'PLBB231211A4F62143',
cdc628a4 3636 'info_dict': {
8bdd16b4 3637 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3638 'id': 'PLBB231211A4F62143',
3639 'uploader': 'Wickydoo',
3640 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3641 },
3642 'playlist_mincount': 29,
3643 }, {
3644 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3645 'info_dict': {
3646 'title': 'YDL_safe_search',
3647 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3648 },
3649 'playlist_count': 2,
3650 'skip': 'This playlist is private',
9558dcec 3651 }, {
8bdd16b4 3652 'note': 'embedded',
3653 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3654 'playlist_count': 4,
9558dcec 3655 'info_dict': {
8bdd16b4 3656 'title': 'JODA15',
3657 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3658 'uploader': 'milan',
3659 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3660 }
cdc628a4 3661 }, {
8bdd16b4 3662 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3663 'playlist_mincount': 982,
3664 'info_dict': {
3665 'title': '2018 Chinese New Singles (11/6 updated)',
3666 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3667 'uploader': 'LBK',
3668 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3669 }
daa0df9e 3670 }, {
29f7c58a 3671 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3672 'only_matching': True,
3673 }, {
3674 # music album playlist
3675 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3676 'only_matching': True,
3677 }]
3678
3679 @classmethod
3680 def suitable(cls, url):
201c1459 3681 if YoutubeTabIE.suitable(url):
3682 return False
1bdae7d3 3683 # Hack for lazy extractors until more generic solution is implemented
3684 # (see #28780)
3685 from .youtube import parse_qs
201c1459 3686 qs = parse_qs(url)
3687 if qs.get('v', [None])[0]:
3688 return False
3689 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3690
3691 def _real_extract(self, url):
3692 playlist_id = self._match_id(url)
9297939e 3693 is_music_url = self.is_music_url(url)
3694 url = update_url_query(
3695 'https://www.youtube.com/playlist',
3696 parse_qs(url) or {'list': playlist_id})
3697 if is_music_url:
3698 url = smuggle_url(url, {'is_music_url': True})
3699 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 3700
3701
3702class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3703 IE_DESC = 'youtu.be'
29f7c58a 3704 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3705 _TESTS = [{
8bdd16b4 3706 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3707 'info_dict': {
3708 'id': 'yeWKywCrFtk',
3709 'ext': 'mp4',
3710 'title': 'Small Scale Baler and Braiding Rugs',
3711 'uploader': 'Backus-Page House Museum',
3712 'uploader_id': 'backuspagemuseum',
3713 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3714 'upload_date': '20161008',
3715 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3716 'categories': ['Nonprofits & Activism'],
3717 'tags': list,
3718 'like_count': int,
3719 'dislike_count': int,
3720 },
3721 'params': {
3722 'noplaylist': True,
3723 'skip_download': True,
3724 },
39e7107d 3725 }, {
8bdd16b4 3726 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3727 'only_matching': True,
cdc628a4
PH
3728 }]
3729
8bdd16b4 3730 def _real_extract(self, url):
29f7c58a 3731 mobj = re.match(self._VALID_URL, url)
3732 video_id = mobj.group('id')
3733 playlist_id = mobj.group('playlist_id')
8bdd16b4 3734 return self.url_result(
29f7c58a 3735 update_url_query('https://www.youtube.com/watch', {
3736 'v': video_id,
3737 'list': playlist_id,
3738 'feature': 'youtu.be',
3739 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3740
3741
3742class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3743 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3744 _VALID_URL = r'ytuser:(?P<id>.+)'
3745 _TESTS = [{
3746 'url': 'ytuser:phihag',
3747 'only_matching': True,
3748 }]
3749
3750 def _real_extract(self, url):
3751 user_id = self._match_id(url)
3752 return self.url_result(
3753 'https://www.youtube.com/user/%s' % user_id,
3754 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3755
b05654f0 3756
3d3dddc9 3757class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3758 IE_NAME = 'youtube:favorites'
3759 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3760 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3761 _LOGIN_REQUIRED = True
3762 _TESTS = [{
3763 'url': ':ytfav',
3764 'only_matching': True,
3765 }, {
3766 'url': ':ytfavorites',
3767 'only_matching': True,
3768 }]
3769
3770 def _real_extract(self, url):
3771 return self.url_result(
3772 'https://www.youtube.com/playlist?list=LL',
3773 ie=YoutubeTabIE.ie_key())
3774
3775
79360d99 3776class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3777 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3778 # there doesn't appear to be a real limit, for example if you search for
3779 # 'python' you get more than 8.000.000 results
3780 _MAX_RESULTS = float('inf')
78caa52a 3781 IE_NAME = 'youtube:search'
b05654f0 3782 _SEARCH_KEY = 'ytsearch'
6c894ea1 3783 _SEARCH_PARAMS = None
9dd8e46a 3784 _TESTS = []
b05654f0 3785
6c894ea1 3786 def _entries(self, query, n):
a5c56234 3787 data = {'query': query}
6c894ea1
U
3788 if self._SEARCH_PARAMS:
3789 data['params'] = self._SEARCH_PARAMS
3790 total = 0
3791 for page_num in itertools.count(1):
79360d99 3792 search = self._extract_response(
3793 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3794 check_get_keys=('contents', 'onResponseReceivedCommands')
3795 )
6c894ea1 3796 if not search:
b4c08069 3797 break
6c894ea1
U
3798 slr_contents = try_get(
3799 search,
3800 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3801 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3802 list)
3803 if not slr_contents:
a22b2fd1 3804 break
0366ae87 3805
0366ae87
M
3806 # Youtube sometimes adds promoted content to searches,
3807 # changing the index location of videos and token.
3808 # So we search through all entries till we find them.
30a074c2 3809 continuation_token = None
3810 for slr_content in slr_contents:
a96c6d15 3811 if continuation_token is None:
3812 continuation_token = try_get(
3813 slr_content,
3814 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3815 compat_str)
3816
30a074c2 3817 isr_contents = try_get(
3818 slr_content,
3819 lambda x: x['itemSectionRenderer']['contents'],
3820 list)
9da76d30 3821 if not isr_contents:
30a074c2 3822 continue
3823 for content in isr_contents:
3824 if not isinstance(content, dict):
3825 continue
3826 video = content.get('videoRenderer')
3827 if not isinstance(video, dict):
3828 continue
3829 video_id = video.get('videoId')
3830 if not video_id:
3831 continue
3832
3833 yield self._extract_video(video)
3834 total += 1
3835 if total == n:
3836 return
0366ae87 3837
0366ae87 3838 if not continuation_token:
6c894ea1 3839 break
0366ae87 3840 data['continuation'] = continuation_token
b05654f0 3841
6c894ea1
U
3842 def _get_n_results(self, query, n):
3843 """Get a specified number of results for a query"""
3844 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3845
c9ae7b95 3846
a3dd9248 3847class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3848 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3849 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3850 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3851 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3852
c9ae7b95 3853
386e1dd9 3854class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3855 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3856 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3857 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3858 # _MAX_RESULTS = 100
3462ffa8 3859 _TESTS = [{
3860 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3861 'playlist_mincount': 5,
3862 'info_dict': {
3863 'title': 'youtube-dl test video',
3864 }
3865 }, {
3866 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3867 'only_matching': True,
3868 }]
3869
386e1dd9 3870 @classmethod
3871 def _make_valid_url(cls):
3872 return cls._VALID_URL
3873
3462ffa8 3874 def _real_extract(self, url):
386e1dd9 3875 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3876 query = (qs.get('search_query') or qs.get('q'))[0]
3877 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3878 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3879
3880
3881class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3882 """
25f14e9f 3883 Base class for feed extractors
3d3dddc9 3884 Subclasses must define the _FEED_NAME property.
d7ae0639 3885 """
b2e8bc1b 3886 _LOGIN_REQUIRED = True
ef2f3c7f 3887 _TESTS = []
d7ae0639
JMF
3888
3889 @property
3890 def IE_NAME(self):
78caa52a 3891 return 'youtube:%s' % self._FEED_NAME
04cc9617 3892
81f0259b 3893 def _real_initialize(self):
b2e8bc1b 3894 self._login()
81f0259b 3895
3853309f 3896 def _real_extract(self, url):
3d3dddc9 3897 return self.url_result(
3898 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3899 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3900
3901
ef2f3c7f 3902class YoutubeWatchLaterIE(InfoExtractor):
3903 IE_NAME = 'youtube:watchlater'
70d5c17b 3904 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3905 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3906 _TESTS = [{
8bdd16b4 3907 'url': ':ytwatchlater',
bc7a9cd8
S
3908 'only_matching': True,
3909 }]
25f14e9f
S
3910
3911 def _real_extract(self, url):
ef2f3c7f 3912 return self.url_result(
3913 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3914
3915
25f14e9f
S
3916class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3917 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3918 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3919 _FEED_NAME = 'recommended'
3d3dddc9 3920 _TESTS = [{
3921 'url': ':ytrec',
3922 'only_matching': True,
3923 }, {
3924 'url': ':ytrecommended',
3925 'only_matching': True,
3926 }, {
3927 'url': 'https://youtube.com',
3928 'only_matching': True,
3929 }]
1ed5b5c9 3930
1ed5b5c9 3931
25f14e9f 3932class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3933 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3934 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3935 _FEED_NAME = 'subscriptions'
3d3dddc9 3936 _TESTS = [{
3937 'url': ':ytsubs',
3938 'only_matching': True,
3939 }, {
3940 'url': ':ytsubscriptions',
3941 'only_matching': True,
3942 }]
1ed5b5c9 3943
1ed5b5c9 3944
25f14e9f 3945class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3946 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3947 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3948 _FEED_NAME = 'history'
3d3dddc9 3949 _TESTS = [{
3950 'url': ':ythistory',
3951 'only_matching': True,
3952 }]
1ed5b5c9
JMF
3953
3954
15870e90
PH
3955class YoutubeTruncatedURLIE(InfoExtractor):
3956 IE_NAME = 'youtube:truncated_url'
3957 IE_DESC = False # Do not list
975d35db 3958 _VALID_URL = r'''(?x)
b95aab84
PH
3959 (?:https?://)?
3960 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3961 (?:watch\?(?:
c4808c60 3962 feature=[a-z_]+|
b95aab84
PH
3963 annotation_id=annotation_[^&]+|
3964 x-yt-cl=[0-9]+|
c1708b89 3965 hl=[^&]*|
287be8c6 3966 t=[0-9]+
b95aab84
PH
3967 )?
3968 |
3969 attribution_link\?a=[^&]+
3970 )
3971 $
975d35db 3972 '''
15870e90 3973
c4808c60 3974 _TESTS = [{
2d3d2997 3975 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3976 'only_matching': True,
dc2fc736 3977 }, {
2d3d2997 3978 'url': 'https://www.youtube.com/watch?',
dc2fc736 3979 'only_matching': True,
b95aab84
PH
3980 }, {
3981 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3982 'only_matching': True,
3983 }, {
3984 'url': 'https://www.youtube.com/watch?feature=foo',
3985 'only_matching': True,
c1708b89
PH
3986 }, {
3987 'url': 'https://www.youtube.com/watch?hl=en-GB',
3988 'only_matching': True,
287be8c6
PH
3989 }, {
3990 'url': 'https://www.youtube.com/watch?t=2372',
3991 'only_matching': True,
c4808c60
PH
3992 }]
3993
15870e90
PH
3994 def _real_extract(self, url):
3995 raise ExtractorError(
78caa52a
PH
3996 'Did you forget to quote the URL? Remember that & is a meta '
3997 'character in most shells, so you want to put the URL in quotes, '
3867038a 3998 'like youtube-dl '
2d3d2997 3999 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4000 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4001 expected=True)
772fd5cc
PH
4002
4003
4004class YoutubeTruncatedIDIE(InfoExtractor):
4005 IE_NAME = 'youtube:truncated_id'
4006 IE_DESC = False # Do not list
b95aab84 4007 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4008
4009 _TESTS = [{
4010 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4011 'only_matching': True,
4012 }]
4013
4014 def _real_extract(self, url):
4015 video_id = self._match_id(url)
4016 raise ExtractorError(
4017 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4018 expected=True)