]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[mildom] Remove proxy (#260)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
cd7c66cf 70 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
71 r'movies|results|shared|hashtag|trending|feed|feeds|'
72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
25f14e9f
S
80 def _ids_to_results(self, ids):
81 return [
82 self.url_result(vid_id, 'Youtube', video_id=vid_id)
83 for vid_id in ids]
84
b2e8bc1b 85 def _login(self):
83317f69 86 """
87 Attempt to log in to YouTube.
88 True is returned if successful or skipped.
89 False is returned if login failed.
90
91 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
92 """
68217024 93 username, password = self._get_login_info()
b2e8bc1b
JMF
94 # No authentication to be performed
95 if username is None:
70d35d16 96 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 97 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 98 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
99 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 100 return True
b2e8bc1b 101
7cc3570e
PH
102 login_page = self._download_webpage(
103 self._LOGIN_URL, None,
69ea8ca4
PH
104 note='Downloading login page',
105 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
106 if login_page is False:
107 return
b2e8bc1b 108
1212e997 109 login_form = self._hidden_inputs(login_page)
c5e8d7af 110
e00eb564
S
111 def req(url, f_req, note, errnote):
112 data = login_form.copy()
113 data.update({
114 'pstMsg': 1,
115 'checkConnection': 'youtube',
116 'checkedDomains': 'youtube',
117 'hl': 'en',
118 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 119 'f.req': json.dumps(f_req),
e00eb564
S
120 'flowName': 'GlifWebSignIn',
121 'flowEntry': 'ServiceLogin',
baf67a60
S
122 # TODO: reverse actual botguard identifier generation algo
123 'bgRequest': '["identifier",""]',
041bc3ad 124 })
e00eb564
S
125 return self._download_json(
126 url, None, note=note, errnote=errnote,
127 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
128 fatal=False,
129 data=urlencode_postdata(data), headers={
130 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
131 'Google-Accounts-XSRF': 1,
132 })
133
3995d37d 134 def warn(message):
6a39ee13 135 self.report_warning(message)
3995d37d
S
136
137 lookup_req = [
138 username,
139 None, [], None, 'US', None, None, 2, False, True,
140 [
141 None, None,
142 [2, 1, None, 1,
143 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
144 None, [], 4],
145 1, [None, None, []], None, None, None, True
146 ],
147 username,
148 ]
149
e00eb564 150 lookup_results = req(
3995d37d 151 self._LOOKUP_URL, lookup_req,
e00eb564
S
152 'Looking up account info', 'Unable to look up account info')
153
154 if lookup_results is False:
155 return False
041bc3ad 156
3995d37d
S
157 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
158 if not user_hash:
159 warn('Unable to extract user hash')
160 return False
161
162 challenge_req = [
163 user_hash,
164 None, 1, None, [1, None, None, None, [password, None, True]],
165 [
166 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
167 1, [None, None, []], None, None, None, True
168 ]]
83317f69 169
3995d37d
S
170 challenge_results = req(
171 self._CHALLENGE_URL, challenge_req,
172 'Logging in', 'Unable to log in')
83317f69 173
3995d37d 174 if challenge_results is False:
e00eb564 175 return
83317f69 176
3995d37d
S
177 login_res = try_get(challenge_results, lambda x: x[0][5], list)
178 if login_res:
179 login_msg = try_get(login_res, lambda x: x[5], compat_str)
180 warn(
181 'Unable to login: %s' % 'Invalid password'
182 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
183 return False
184
185 res = try_get(challenge_results, lambda x: x[0][-1], list)
186 if not res:
187 warn('Unable to extract result entry')
188 return False
189
9a6628aa
S
190 login_challenge = try_get(res, lambda x: x[0][0], list)
191 if login_challenge:
192 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
193 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
194 # SEND_SUCCESS - TFA code has been successfully sent to phone
195 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 196 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
197 if status == 'QUOTA_EXCEEDED':
198 warn('Exceeded the limit of TFA codes, try later')
199 return False
200
201 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
202 if not tl:
203 warn('Unable to extract TL')
204 return False
205
206 tfa_code = self._get_tfa_info('2-step verification code')
207
208 if not tfa_code:
209 warn(
210 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
211 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
212 return False
213
214 tfa_code = remove_start(tfa_code, 'G-')
215
216 tfa_req = [
217 user_hash, None, 2, None,
218 [
219 9, None, None, None, None, None, None, None,
220 [None, tfa_code, True, 2]
221 ]]
222
223 tfa_results = req(
224 self._TFA_URL.format(tl), tfa_req,
225 'Submitting TFA code', 'Unable to submit TFA code')
226
227 if tfa_results is False:
228 return False
229
230 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
231 if tfa_res:
232 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
233 warn(
234 'Unable to finish TFA: %s' % 'Invalid TFA code'
235 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
236 return False
237
238 check_cookie_url = try_get(
239 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
240 else:
241 CHALLENGES = {
242 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
243 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
244 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
245 }
246 challenge = CHALLENGES.get(
247 challenge_str,
248 '%s returned error %s.' % (self.IE_NAME, challenge_str))
249 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
250 return False
3995d37d
S
251 else:
252 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
253
254 if not check_cookie_url:
255 warn('Unable to extract CheckCookie URL')
256 return False
e00eb564
S
257
258 check_cookie_results = self._download_webpage(
3995d37d
S
259 check_cookie_url, None, 'Checking cookie', fatal=False)
260
261 if check_cookie_results is False:
262 return False
e00eb564 263
3995d37d
S
264 if 'https://myaccount.google.com/' not in check_cookie_results:
265 warn('Unable to log in')
b2e8bc1b 266 return False
e00eb564 267
b2e8bc1b
JMF
268 return True
269
cce889b9 270 def _initialize_consent(self):
271 cookies = self._get_cookies('https://www.youtube.com/')
272 if cookies.get('__Secure-3PSID'):
273 return
274 consent_id = None
275 consent = cookies.get('CONSENT')
276 if consent:
277 if 'YES' in consent.value:
278 return
279 consent_id = self._search_regex(
280 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
281 if not consent_id:
282 consent_id = random.randint(100, 999)
283 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 284
b2e8bc1b 285 def _real_initialize(self):
cce889b9 286 self._initialize_consent()
b2e8bc1b
JMF
287 if self._downloader is None:
288 return
b2e8bc1b
JMF
289 if not self._login():
290 return
c5e8d7af 291
f4f751af 292 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
293 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 294 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 295 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
296 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 297
a5c56234
M
298 def _generate_sapisidhash_header(self):
299 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
300 if sapisid_cookie is None:
301 return
302 time_now = round(time.time())
303 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
304 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
305
306 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 307 note='Downloading API JSON', errnote='Unable to download API page',
308 context=None, api_key=None):
309
310 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 311 data.update(query)
f4f751af 312 real_headers = self._generate_api_headers()
313 real_headers.update({'content-type': 'application/json'})
314 if headers:
315 real_headers.update(headers)
545cc85d 316 return self._download_json(
a5c56234
M
317 'https://www.youtube.com/youtubei/v1/%s' % ep,
318 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 319 data=json.dumps(data).encode('utf8'), headers=real_headers,
320 query={'key': api_key or self._extract_api_key()})
321
322 def _extract_api_key(self, ytcfg=None):
323 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 324
8bdd16b4 325 def _extract_yt_initial_data(self, video_id, webpage):
326 return self._parse_json(
327 self._search_regex(
29f7c58a 328 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 329 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 330 video_id)
0c148415 331
a1c5d2ca
M
332 def _extract_identity_token(self, webpage, item_id):
333 ytcfg = self._extract_ytcfg(item_id, webpage)
334 if ytcfg:
335 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
336 if token:
337 return token
338 return self._search_regex(
339 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
340 'identity token', default=None)
341
342 @staticmethod
343 def _extract_account_syncid(data):
8ea3f7b9 344 """
345 Extract syncId required to download private playlists of secondary channels
346 @param data Either response or ytcfg
347 """
348 sync_ids = (try_get(
349 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
350 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
351 if len(sync_ids) >= 2 and sync_ids[1]:
352 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
353 # and just "user_syncid||" for primary channel. We only want the channel_syncid
354 return sync_ids[0]
8ea3f7b9 355 # ytcfg includes channel_syncid if on secondary channel
356 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 357
29f7c58a 358 def _extract_ytcfg(self, video_id, webpage):
8c54a305 359 if not webpage:
360 return {}
29f7c58a 361 return self._parse_json(
362 self._search_regex(
363 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 364 default='{}'), video_id, fatal=False) or {}
365
366 def __extract_client_version(self, ytcfg):
367 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
368
369 def _extract_context(self, ytcfg=None):
370 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
371 if context:
372 return context
373
374 # Recreate the client context (required)
375 client_version = self.__extract_client_version(ytcfg)
376 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
377 context = {
378 'client': {
379 'clientName': client_name,
380 'clientVersion': client_version,
381 }
382 }
383 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
384 if visitor_data:
385 context['client']['visitorData'] = visitor_data
386 return context
387
388 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
389 headers = {
390 'X-YouTube-Client-Name': '1',
391 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
392 }
393 if identity_token:
394 headers['x-youtube-identity-token'] = identity_token
395 if account_syncid:
396 headers['X-Goog-PageId'] = account_syncid
397 headers['X-Goog-AuthUser'] = 0
398 if visitor_data:
399 headers['x-goog-visitor-id'] = visitor_data
400 auth = self._generate_sapisidhash_header()
401 if auth is not None:
402 headers['Authorization'] = auth
403 headers['X-Origin'] = 'https://www.youtube.com'
404 return headers
29f7c58a 405
30a074c2 406 def _extract_video(self, renderer):
407 video_id = renderer.get('videoId')
408 title = try_get(
409 renderer,
410 (lambda x: x['title']['runs'][0]['text'],
411 lambda x: x['title']['simpleText']), compat_str)
412 description = try_get(
413 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
414 compat_str)
415 duration = parse_duration(try_get(
416 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
417 view_count_text = try_get(
418 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
419 view_count = str_to_int(self._search_regex(
420 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
421 'view count', default=None))
422 uploader = try_get(
bc2ca1bb 423 renderer,
424 (lambda x: x['ownerText']['runs'][0]['text'],
425 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 426 return {
39ed931e 427 '_type': 'url',
30a074c2 428 'ie_key': YoutubeIE.ie_key(),
429 'id': video_id,
430 'url': video_id,
431 'title': title,
432 'description': description,
433 'duration': duration,
434 'view_count': view_count,
435 'uploader': uploader,
436 }
437
0c148415 438
360e1ca5 439class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 440 IE_DESC = 'YouTube.com'
bc2ca1bb 441 _INVIDIOUS_SITES = (
442 # invidious-redirect websites
443 r'(?:www\.)?redirect\.invidious\.io',
444 r'(?:(?:www|dev)\.)?invidio\.us',
445 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
446 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 447 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 448 r'(?:(?:www|au)\.)?ytprivate\.com',
449 r'(?:www\.)?invidious\.namazso\.eu',
450 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 451 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
452 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
453 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
454 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
455 # youtube-dl invidious instances list
456 r'(?:(?:www|no)\.)?invidiou\.sh',
457 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
458 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 459 r'(?:www\.)?invidious\.mastodon\.host',
460 r'(?:www\.)?invidious\.zapashcanon\.fr',
461 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 462 r'(?:www\.)?invidious\.tinfoil-hat\.net',
463 r'(?:www\.)?invidious\.himiko\.cloud',
464 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 465 r'(?:www\.)?invidious\.tube',
466 r'(?:www\.)?invidiou\.site',
467 r'(?:www\.)?invidious\.site',
468 r'(?:www\.)?invidious\.xyz',
469 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 470 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 471 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 472 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 473 r'(?:www\.)?tube\.poal\.co',
474 r'(?:www\.)?tube\.connect\.cafe',
475 r'(?:www\.)?vid\.wxzm\.sx',
476 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 477 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 478 r'(?:www\.)?yewtu\.be',
479 r'(?:www\.)?yt\.elukerio\.org',
480 r'(?:www\.)?yt\.lelux\.fi',
481 r'(?:www\.)?invidious\.ggc-project\.de',
482 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 483 r'(?:www\.)?ytprivate\.com',
484 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 485 r'(?:www\.)?invidious\.toot\.koeln',
486 r'(?:www\.)?invidious\.fdn\.fr',
487 r'(?:www\.)?watch\.nettohikari\.com',
488 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
489 r'(?:www\.)?qklhadlycap4cnod\.onion',
490 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
491 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
492 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
493 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
494 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
495 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
496 )
cb7dfeea 497 _VALID_URL = r"""(?x)^
c5e8d7af 498 (
edb53e2d 499 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 500 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
501 (?:www\.)?deturl\.com/www\.youtube\.com|
502 (?:www\.)?pwnyoutube\.com|
503 (?:www\.)?hooktube\.com|
504 (?:www\.)?yourepeat\.com|
505 tube\.majestyc\.net|
506 %(invidious)s|
507 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
508 (?:.*?\#/)? # handle anchor (#/) redirect urls
509 (?: # the various things that can precede the ID:
ac7553d0 510 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 511 |(?: # or the v= param in all its forms
f7000f3a 512 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 513 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 514 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
515 v=
516 )
f4b05232 517 ))
cbaed4bb
S
518 |(?:
519 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
520 vid\.plus| # or vid.plus/xxxx
521 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 522 %(invidious)s
cbaed4bb 523 )/
edb53e2d 524 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 525 )
c5e8d7af 526 )? # all until now is optional -> you can pass the naked ID
201c1459 527 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 528 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 529 $""" % {
bc2ca1bb 530 'invidious': '|'.join(_INVIDIOUS_SITES),
531 }
e40c758c 532 _PLAYER_INFO_RE = (
cc2db878 533 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
534 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 535 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 536 )
2c62dc26 537 _formats = {
c2d3cb4c 538 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
539 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
540 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
541 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
542 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
543 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
544 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
545 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 546 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 547 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
548 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
549 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
550 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
551 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
552 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 553 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 554 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
555 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 556
557
558 # 3D videos
c2d3cb4c 559 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
560 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
561 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
562 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 563 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
564 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
565 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 566
96fb5605 567 # Apple HTTP Live Streaming
11f12195 568 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 569 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
570 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
571 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
572 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
573 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 574 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
575 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
576
577 # DASH mp4 video
d23028a8
S
578 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
579 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
580 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
581 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
582 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 583 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
584 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
585 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
586 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
587 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
588 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
589 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 590
f6f1fc92 591 # Dash mp4 audio
d23028a8
S
592 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
593 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
594 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
595 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
596 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
597 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
598 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
599
600 # Dash webm
d23028a8
S
601 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
602 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
603 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
604 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
605 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
606 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
607 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
608 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
609 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
610 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
611 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
612 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
613 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
614 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
615 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 616 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
617 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
618 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
619 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
620 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
621 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
622 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
623
624 # Dash webm audio
d23028a8
S
625 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
626 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 627
0857baad 628 # Dash webm audio with opus inside
d23028a8
S
629 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
630 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
631 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 632
ce6b9a2d
PH
633 # RTMP (unnamed)
634 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
635
636 # av01 video only formats sometimes served with "unknown" codecs
637 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
638 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
639 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
640 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 641 }
29f7c58a 642 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 643
fd5c4aab
S
644 _GEO_BYPASS = False
645
78caa52a 646 IE_NAME = 'youtube'
2eb88d95
PH
647 _TESTS = [
648 {
2d3d2997 649 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
650 'info_dict': {
651 'id': 'BaW_jenozKc',
652 'ext': 'mp4',
3867038a 653 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
654 'uploader': 'Philipp Hagemeister',
655 'uploader_id': 'phihag',
ec85ded8 656 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
657 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
658 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 659 'upload_date': '20121002',
3867038a 660 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 661 'categories': ['Science & Technology'],
3867038a 662 'tags': ['youtube-dl'],
556dbe7f 663 'duration': 10,
dbdaaa23 664 'view_count': int,
3e7c1224
PH
665 'like_count': int,
666 'dislike_count': int,
7c80519c 667 'start_time': 1,
297a564b 668 'end_time': 9,
2eb88d95 669 }
0e853ca4 670 },
fccd3771 671 {
4bc3a23e
PH
672 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
673 'note': 'Embed-only video (#1746)',
674 'info_dict': {
675 'id': 'yZIXLfi8CZQ',
676 'ext': 'mp4',
677 'upload_date': '20120608',
678 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
679 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
680 'uploader': 'SET India',
94bfcd23 681 'uploader_id': 'setindia',
ec85ded8 682 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 683 'age_limit': 18,
545cc85d 684 },
685 'skip': 'Private video',
fccd3771 686 },
11b56058 687 {
8bdd16b4 688 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
689 'note': 'Use the first video ID in the URL',
690 'info_dict': {
691 'id': 'BaW_jenozKc',
692 'ext': 'mp4',
3867038a 693 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
694 'uploader': 'Philipp Hagemeister',
695 'uploader_id': 'phihag',
ec85ded8 696 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 697 'upload_date': '20121002',
3867038a 698 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 699 'categories': ['Science & Technology'],
3867038a 700 'tags': ['youtube-dl'],
556dbe7f 701 'duration': 10,
dbdaaa23 702 'view_count': int,
11b56058
PM
703 'like_count': int,
704 'dislike_count': int,
34a7de29
S
705 },
706 'params': {
707 'skip_download': True,
708 },
11b56058 709 },
dd27fd17 710 {
2d3d2997 711 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
712 'note': '256k DASH audio (format 141) via DASH manifest',
713 'info_dict': {
714 'id': 'a9LDPn-MO4I',
715 'ext': 'm4a',
716 'upload_date': '20121002',
717 'uploader_id': '8KVIDEO',
ec85ded8 718 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
719 'description': '',
720 'uploader': '8KVIDEO',
721 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 722 },
4bc3a23e
PH
723 'params': {
724 'youtube_include_dash_manifest': True,
725 'format': '141',
4919603f 726 },
de3c7fe0 727 'skip': 'format 141 not served anymore',
dd27fd17 728 },
8bdd16b4 729 # DASH manifest with encrypted signature
730 {
731 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
732 'info_dict': {
733 'id': 'IB3lcPjvWLA',
734 'ext': 'm4a',
735 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
736 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
737 'duration': 244,
738 'uploader': 'AfrojackVEVO',
739 'uploader_id': 'AfrojackVEVO',
740 'upload_date': '20131011',
cc2db878 741 'abr': 129.495,
8bdd16b4 742 },
743 'params': {
744 'youtube_include_dash_manifest': True,
745 'format': '141/bestaudio[ext=m4a]',
746 },
747 },
aa79ac0c
PH
748 # Controversy video
749 {
750 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
751 'info_dict': {
752 'id': 'T4XJQO3qol8',
753 'ext': 'mp4',
556dbe7f 754 'duration': 219,
aa79ac0c 755 'upload_date': '20100909',
4fe54c12 756 'uploader': 'Amazing Atheist',
aa79ac0c 757 'uploader_id': 'TheAmazingAtheist',
ec85ded8 758 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 759 'title': 'Burning Everyone\'s Koran',
545cc85d 760 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 761 }
c522adb1 762 },
dd2d55f1 763 # Normal age-gate video (embed allowed)
c522adb1 764 {
2d3d2997 765 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
766 'info_dict': {
767 'id': 'HtVdAasjOgU',
768 'ext': 'mp4',
769 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 770 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 771 'duration': 142,
c522adb1
JMF
772 'uploader': 'The Witcher',
773 'uploader_id': 'WitcherGame',
ec85ded8 774 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 775 'upload_date': '20140605',
34952f09 776 'age_limit': 18,
c522adb1
JMF
777 },
778 },
8bdd16b4 779 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
780 # YouTube Red ad is not captured for creator
781 {
782 'url': '__2ABJjxzNo',
783 'info_dict': {
784 'id': '__2ABJjxzNo',
785 'ext': 'mp4',
786 'duration': 266,
787 'upload_date': '20100430',
788 'uploader_id': 'deadmau5',
789 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 790 'creator': 'deadmau5',
791 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 792 'uploader': 'deadmau5',
793 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 794 'alt_title': 'Some Chords',
8bdd16b4 795 },
796 'expected_warnings': [
797 'DASH manifest missing',
798 ]
799 },
067aa17e 800 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
801 {
802 'url': 'lqQg6PlCWgI',
803 'info_dict': {
804 'id': 'lqQg6PlCWgI',
805 'ext': 'mp4',
556dbe7f 806 'duration': 6085,
90227264 807 'upload_date': '20150827',
cbe2bd91 808 'uploader_id': 'olympic',
ec85ded8 809 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 810 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 811 'uploader': 'Olympic',
cbe2bd91
PH
812 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
813 },
814 'params': {
815 'skip_download': 'requires avconv',
e52a40ab 816 }
cbe2bd91 817 },
6271f1ca
PH
818 # Non-square pixels
819 {
820 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
821 'info_dict': {
822 'id': '_b-2C3KPAM0',
823 'ext': 'mp4',
824 'stretched_ratio': 16 / 9.,
556dbe7f 825 'duration': 85,
6271f1ca
PH
826 'upload_date': '20110310',
827 'uploader_id': 'AllenMeow',
ec85ded8 828 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 829 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 830 'uploader': '孫ᄋᄅ',
6271f1ca
PH
831 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
832 },
06b491eb
S
833 },
834 # url_encoded_fmt_stream_map is empty string
835 {
836 'url': 'qEJwOuvDf7I',
837 'info_dict': {
838 'id': 'qEJwOuvDf7I',
f57b7835 839 'ext': 'webm',
06b491eb
S
840 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
841 'description': '',
842 'upload_date': '20150404',
843 'uploader_id': 'spbelect',
844 'uploader': 'Наблюдатели Петербурга',
845 },
846 'params': {
847 'skip_download': 'requires avconv',
e323cf3f
S
848 },
849 'skip': 'This live event has ended.',
06b491eb 850 },
067aa17e 851 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
852 {
853 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
854 'info_dict': {
855 'id': 'FIl7x6_3R5Y',
eb6793ba 856 'ext': 'webm',
da77d856
S
857 'title': 'md5:7b81415841e02ecd4313668cde88737a',
858 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 859 'duration': 220,
da77d856
S
860 'upload_date': '20150625',
861 'uploader_id': 'dorappi2000',
ec85ded8 862 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 863 'uploader': 'dorappi2000',
eb6793ba 864 'formats': 'mincount:31',
da77d856 865 },
eb6793ba 866 'skip': 'not actual anymore',
2ee8f5d8 867 },
8a1a26ce
YCH
868 # DASH manifest with segment_list
869 {
870 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
871 'md5': '8ce563a1d667b599d21064e982ab9e31',
872 'info_dict': {
873 'id': 'CsmdDsKjzN8',
874 'ext': 'mp4',
17ee98e1 875 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
876 'uploader': 'Airtek',
877 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
878 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
879 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
880 },
881 'params': {
882 'youtube_include_dash_manifest': True,
883 'format': '135', # bestvideo
be49068d
S
884 },
885 'skip': 'This live event has ended.',
2ee8f5d8 886 },
cf7e015f
S
887 {
888 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 889 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 890 'info_dict': {
545cc85d 891 'id': 'jvGDaLqkpTg',
892 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
893 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
894 },
895 'playlist': [{
896 'info_dict': {
545cc85d 897 'id': 'jvGDaLqkpTg',
cf7e015f 898 'ext': 'mp4',
545cc85d 899 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
900 'description': 'md5:e03b909557865076822aa169218d6a5d',
901 'duration': 10643,
902 'upload_date': '20161111',
903 'uploader': 'Team PGP',
904 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
905 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
906 },
907 }, {
908 'info_dict': {
545cc85d 909 'id': '3AKt1R1aDnw',
cf7e015f 910 'ext': 'mp4',
545cc85d 911 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
912 'description': 'md5:e03b909557865076822aa169218d6a5d',
913 'duration': 10991,
914 'upload_date': '20161111',
915 'uploader': 'Team PGP',
916 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
917 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
918 },
919 }, {
920 'info_dict': {
545cc85d 921 'id': 'RtAMM00gpVc',
cf7e015f 922 'ext': 'mp4',
545cc85d 923 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
924 'description': 'md5:e03b909557865076822aa169218d6a5d',
925 'duration': 10995,
926 'upload_date': '20161111',
927 'uploader': 'Team PGP',
928 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
929 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
930 },
931 }, {
932 'info_dict': {
545cc85d 933 'id': '6N2fdlP3C5U',
cf7e015f 934 'ext': 'mp4',
545cc85d 935 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
936 'description': 'md5:e03b909557865076822aa169218d6a5d',
937 'duration': 10990,
938 'upload_date': '20161111',
939 'uploader': 'Team PGP',
940 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
941 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
942 },
943 }],
944 'params': {
945 'skip_download': True,
946 },
cbaed4bb 947 },
f9f49d87 948 {
067aa17e 949 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
950 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
951 'info_dict': {
952 'id': 'gVfLd0zydlo',
953 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
954 },
955 'playlist_count': 2,
be49068d 956 'skip': 'Not multifeed anymore',
f9f49d87 957 },
cbaed4bb 958 {
2d3d2997 959 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 960 'only_matching': True,
0e49d9a6 961 },
6d4fc66b 962 {
2d3d2997 963 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
964 'only_matching': True,
965 },
0e49d9a6 966 {
067aa17e 967 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 968 # Also tests cut-off URL expansion in video description (see
067aa17e
S
969 # https://github.com/ytdl-org/youtube-dl/issues/1892,
970 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
971 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
972 'info_dict': {
973 'id': 'lsguqyKfVQg',
974 'ext': 'mp4',
975 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 976 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 977 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 978 'duration': 133,
0e49d9a6
LL
979 'upload_date': '20151119',
980 'uploader_id': 'IronSoulElf',
ec85ded8 981 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 982 'uploader': 'IronSoulElf',
eb6793ba
S
983 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
984 'track': 'Dark Walk - Position Music',
985 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 986 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
987 },
988 'params': {
989 'skip_download': True,
990 },
991 },
61f92af1 992 {
067aa17e 993 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
994 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
995 'only_matching': True,
996 },
313dfc45
LL
997 {
998 # Video with yt:stretch=17:0
999 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1000 'info_dict': {
1001 'id': 'Q39EVAstoRM',
1002 'ext': 'mp4',
1003 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1004 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1005 'upload_date': '20151107',
1006 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1007 'uploader': 'CH GAMER DROID',
1008 },
1009 'params': {
1010 'skip_download': True,
1011 },
be49068d 1012 'skip': 'This video does not exist.',
313dfc45 1013 },
201c1459 1014 {
1015 # Video with incomplete 'yt:stretch=16:'
1016 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1017 'only_matching': True,
1018 },
7caf9830
S
1019 {
1020 # Video licensed under Creative Commons
1021 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1022 'info_dict': {
1023 'id': 'M4gD1WSo5mA',
1024 'ext': 'mp4',
1025 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1026 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1027 'duration': 721,
7caf9830
S
1028 'upload_date': '20150127',
1029 'uploader_id': 'BerkmanCenter',
ec85ded8 1030 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1031 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1032 'license': 'Creative Commons Attribution license (reuse allowed)',
1033 },
1034 'params': {
1035 'skip_download': True,
1036 },
1037 },
fd050249
S
1038 {
1039 # Channel-like uploader_url
1040 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1041 'info_dict': {
1042 'id': 'eQcmzGIKrzg',
1043 'ext': 'mp4',
1044 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1045 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1046 'duration': 4060,
fd050249 1047 'upload_date': '20151119',
eb6793ba 1048 'uploader': 'Bernie Sanders',
fd050249 1049 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1050 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1051 'license': 'Creative Commons Attribution license (reuse allowed)',
1052 },
1053 'params': {
1054 'skip_download': True,
1055 },
1056 },
040ac686
S
1057 {
1058 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1059 'only_matching': True,
7f29cf54
S
1060 },
1061 {
067aa17e 1062 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1063 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1064 'only_matching': True,
6496ccb4
S
1065 },
1066 {
1067 # Rental video preview
1068 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1069 'info_dict': {
1070 'id': 'uGpuVWrhIzE',
1071 'ext': 'mp4',
1072 'title': 'Piku - Trailer',
1073 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1074 'upload_date': '20150811',
1075 'uploader': 'FlixMatrix',
1076 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1077 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1078 'license': 'Standard YouTube License',
1079 },
1080 'params': {
1081 'skip_download': True,
1082 },
eb6793ba 1083 'skip': 'This video is not available.',
022a5d66 1084 },
12afdc2a
S
1085 {
1086 # YouTube Red video with episode data
1087 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1088 'info_dict': {
1089 'id': 'iqKdEhx-dD4',
1090 'ext': 'mp4',
1091 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1092 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1093 'duration': 2085,
12afdc2a
S
1094 'upload_date': '20170118',
1095 'uploader': 'Vsauce',
1096 'uploader_id': 'Vsauce',
1097 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1098 'series': 'Mind Field',
1099 'season_number': 1,
1100 'episode_number': 1,
1101 },
1102 'params': {
1103 'skip_download': True,
1104 },
1105 'expected_warnings': [
1106 'Skipping DASH manifest',
1107 ],
1108 },
c7121fa7
S
1109 {
1110 # The following content has been identified by the YouTube community
1111 # as inappropriate or offensive to some audiences.
1112 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1113 'info_dict': {
1114 'id': '6SJNVb0GnPI',
1115 'ext': 'mp4',
1116 'title': 'Race Differences in Intelligence',
1117 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1118 'duration': 965,
1119 'upload_date': '20140124',
1120 'uploader': 'New Century Foundation',
1121 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1122 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1123 },
1124 'params': {
1125 'skip_download': True,
1126 },
545cc85d 1127 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1128 },
022a5d66
S
1129 {
1130 # itag 212
1131 'url': '1t24XAntNCY',
1132 'only_matching': True,
fd5c4aab
S
1133 },
1134 {
1135 # geo restricted to JP
1136 'url': 'sJL6WA-aGkQ',
1137 'only_matching': True,
1138 },
cd5a74a2
S
1139 {
1140 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1141 'only_matching': True,
1142 },
bc2ca1bb 1143 {
1144 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1145 'only_matching': True,
1146 },
1147 {
1148 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1149 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1150 'only_matching': True,
1151 },
825cd268
RA
1152 {
1153 # DRM protected
1154 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1155 'only_matching': True,
4fe54c12
S
1156 },
1157 {
1158 # Video with unsupported adaptive stream type formats
1159 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1160 'info_dict': {
1161 'id': 'Z4Vy8R84T1U',
1162 'ext': 'mp4',
1163 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1164 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1165 'duration': 433,
1166 'upload_date': '20130923',
1167 'uploader': 'Amelia Putri Harwita',
1168 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1169 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1170 'formats': 'maxcount:10',
1171 },
1172 'params': {
1173 'skip_download': True,
1174 'youtube_include_dash_manifest': False,
1175 },
5429d6a9 1176 'skip': 'not actual anymore',
5caabd3c 1177 },
1178 {
822b9d9c 1179 # Youtube Music Auto-generated description
5caabd3c 1180 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1181 'info_dict': {
1182 'id': 'MgNrAu2pzNs',
1183 'ext': 'mp4',
1184 'title': 'Voyeur Girl',
1185 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1186 'upload_date': '20190312',
5429d6a9
S
1187 'uploader': 'Stephen - Topic',
1188 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1189 'artist': 'Stephen',
1190 'track': 'Voyeur Girl',
1191 'album': 'it\'s too much love to know my dear',
1192 'release_date': '20190313',
1193 'release_year': 2019,
1194 },
1195 'params': {
1196 'skip_download': True,
1197 },
1198 },
66b48727
RA
1199 {
1200 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1201 'only_matching': True,
1202 },
011e75e6
S
1203 {
1204 # invalid -> valid video id redirection
1205 'url': 'DJztXj2GPfl',
1206 'info_dict': {
1207 'id': 'DJztXj2GPfk',
1208 'ext': 'mp4',
1209 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1210 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1211 'upload_date': '20090125',
1212 'uploader': 'Prochorowka',
1213 'uploader_id': 'Prochorowka',
1214 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1215 'artist': 'Panjabi MC',
1216 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1217 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1218 },
1219 'params': {
1220 'skip_download': True,
1221 },
545cc85d 1222 'skip': 'Video unavailable',
ea74e00b
DP
1223 },
1224 {
1225 # empty description results in an empty string
1226 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1227 'info_dict': {
1228 'id': 'x41yOUIvK2k',
1229 'ext': 'mp4',
1230 'title': 'IMG 3456',
1231 'description': '',
1232 'upload_date': '20170613',
1233 'uploader_id': 'ElevageOrVert',
1234 'uploader': 'ElevageOrVert',
1235 },
1236 'params': {
1237 'skip_download': True,
1238 },
1239 },
a0566bbf 1240 {
29f7c58a 1241 # with '};' inside yt initial data (see [1])
1242 # see [2] for an example with '};' inside ytInitialPlayerResponse
1243 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1244 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1245 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1246 'info_dict': {
1247 'id': 'CHqg6qOn4no',
1248 'ext': 'mp4',
1249 'title': 'Part 77 Sort a list of simple types in c#',
1250 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1251 'upload_date': '20130831',
1252 'uploader_id': 'kudvenkat',
1253 'uploader': 'kudvenkat',
1254 },
1255 'params': {
1256 'skip_download': True,
1257 },
1258 },
29f7c58a 1259 {
1260 # another example of '};' in ytInitialData
1261 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1262 'only_matching': True,
1263 },
1264 {
1265 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1266 'only_matching': True,
1267 },
545cc85d 1268 {
cc2db878 1269 # https://github.com/ytdl-org/youtube-dl/pull/28094
1270 'url': 'OtqTfy26tG0',
1271 'info_dict': {
1272 'id': 'OtqTfy26tG0',
1273 'ext': 'mp4',
1274 'title': 'Burn Out',
1275 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1276 'upload_date': '20141120',
1277 'uploader': 'The Cinematic Orchestra - Topic',
1278 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1279 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1280 'artist': 'The Cinematic Orchestra',
1281 'track': 'Burn Out',
1282 'album': 'Every Day',
1283 'release_data': None,
1284 'release_year': None,
1285 },
1286 'params': {
1287 'skip_download': True,
1288 },
545cc85d 1289 },
bc2ca1bb 1290 {
1291 # controversial video, only works with bpctr when authenticated with cookies
1292 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1293 'only_matching': True,
1294 },
f7ad7160 1295 {
1296 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1297 'url': 'cBvYw8_A0vQ',
1298 'info_dict': {
1299 'id': 'cBvYw8_A0vQ',
1300 'ext': 'mp4',
1301 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1302 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1303 'upload_date': '20201120',
1304 'uploader': 'Walk around Japan',
1305 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1306 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1307 },
1308 'params': {
1309 'skip_download': True,
1310 },
1311 },
2eb88d95
PH
1312 ]
1313
201c1459 1314 @classmethod
1315 def suitable(cls, url):
1316 qs = parse_qs(url)
1317 if qs.get('list', [None])[0]:
1318 return False
1319 return super(YoutubeIE, cls).suitable(url)
1320
e0df6211
PH
1321 def __init__(self, *args, **kwargs):
1322 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1323 self._code_cache = {}
83799698 1324 self._player_cache = {}
e0df6211 1325
60064c53
PH
1326 def _signature_cache_id(self, example_sig):
1327 """ Return a string representation of a signature """
78caa52a 1328 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1329
e40c758c
S
1330 @classmethod
1331 def _extract_player_info(cls, player_url):
1332 for player_re in cls._PLAYER_INFO_RE:
1333 id_m = re.search(player_re, player_url)
1334 if id_m:
1335 break
1336 else:
c081b35c 1337 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1338 return id_m.group('id')
e40c758c
S
1339
1340 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1341 player_id = self._extract_player_info(player_url)
e0df6211 1342
c4417ddb 1343 # Read from filesystem cache
545cc85d 1344 func_id = 'js_%s_%s' % (
1345 player_id, self._signature_cache_id(example_sig))
c4417ddb 1346 assert os.path.basename(func_id) == func_id
a0e07d31 1347
69ea8ca4 1348 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1349 if cache_spec is not None:
78caa52a 1350 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1351
545cc85d 1352 if player_id not in self._code_cache:
1353 self._code_cache[player_id] = self._download_webpage(
e0df6211 1354 player_url, video_id,
545cc85d 1355 note='Downloading player ' + player_id,
69ea8ca4 1356 errnote='Download of %s failed' % player_url)
545cc85d 1357 code = self._code_cache[player_id]
1358 res = self._parse_sig_js(code)
e0df6211 1359
785521bf
PH
1360 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1361 cache_res = res(test_string)
1362 cache_spec = [ord(c) for c in cache_res]
83799698 1363
69ea8ca4 1364 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1365 return res
1366
60064c53 1367 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1368 def gen_sig_code(idxs):
1369 def _genslice(start, end, step):
78caa52a 1370 starts = '' if start == 0 else str(start)
8bcc8756 1371 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1372 steps = '' if step == 1 else (':%d' % step)
78caa52a 1373 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1374
1375 step = None
7af808a5
PH
1376 # Quelch pyflakes warnings - start will be set when step is set
1377 start = '(Never used)'
edf3e38e
PH
1378 for i, prev in zip(idxs[1:], idxs[:-1]):
1379 if step is not None:
1380 if i - prev == step:
1381 continue
1382 yield _genslice(start, prev, step)
1383 step = None
1384 continue
1385 if i - prev in [-1, 1]:
1386 step = i - prev
1387 start = prev
1388 continue
1389 else:
78caa52a 1390 yield 's[%d]' % prev
edf3e38e 1391 if step is None:
78caa52a 1392 yield 's[%d]' % i
edf3e38e
PH
1393 else:
1394 yield _genslice(start, i, step)
1395
78caa52a 1396 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1397 cache_res = func(test_string)
edf3e38e 1398 cache_spec = [ord(c) for c in cache_res]
78caa52a 1399 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1400 signature_id_tuple = '(%s)' % (
1401 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1402 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1403 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1404 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1405
e0df6211
PH
1406 def _parse_sig_js(self, jscode):
1407 funcname = self._search_regex(
abefc03f
S
1408 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1409 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1410 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1411 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1412 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1413 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1414 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1415 # Obsolete patterns
1416 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1417 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1418 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1419 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1420 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1421 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1422 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1423 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1424 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1425
1426 jsi = JSInterpreter(jscode)
1427 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1428 return lambda s: initial_function([s])
1429
545cc85d 1430 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1431 """Turn the encrypted s field into a working signature"""
6b37f0be 1432
c8bf86d5 1433 if player_url is None:
69ea8ca4 1434 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1435
69ea8ca4 1436 if player_url.startswith('//'):
78caa52a 1437 player_url = 'https:' + player_url
3c90cc8b
S
1438 elif not re.match(r'https?://', player_url):
1439 player_url = compat_urlparse.urljoin(
1440 'https://www.youtube.com', player_url)
c8bf86d5 1441 try:
62af3a0e 1442 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1443 if player_id not in self._player_cache:
1444 func = self._extract_signature_function(
60064c53 1445 video_id, player_url, s
c8bf86d5
PH
1446 )
1447 self._player_cache[player_id] = func
1448 func = self._player_cache[player_id]
1449 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1450 self._print_sig_code(func, s)
c8bf86d5
PH
1451 return func(s)
1452 except Exception as e:
1453 tb = traceback.format_exc()
1454 raise ExtractorError(
78caa52a 1455 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1456
545cc85d 1457 def _mark_watched(self, video_id, player_response):
21c340b8
S
1458 playback_url = url_or_none(try_get(
1459 player_response,
545cc85d 1460 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1461 if not playback_url:
1462 return
1463 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1464 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1465
1466 # cpn generation algorithm is reverse engineered from base.js.
1467 # In fact it works even with dummy cpn.
1468 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1469 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1470
1471 qs.update({
1472 'ver': ['2'],
1473 'cpn': [cpn],
1474 })
1475 playback_url = compat_urlparse.urlunparse(
15707c7e 1476 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1477
1478 self._download_webpage(
1479 playback_url, video_id, 'Marking watched',
1480 'Unable to mark watched', fatal=False)
1481
66c9fa36
S
1482 @staticmethod
1483 def _extract_urls(webpage):
1484 # Embedded YouTube player
1485 entries = [
1486 unescapeHTML(mobj.group('url'))
1487 for mobj in re.finditer(r'''(?x)
1488 (?:
1489 <iframe[^>]+?src=|
1490 data-video-url=|
1491 <embed[^>]+?src=|
1492 embedSWF\(?:\s*|
1493 <object[^>]+data=|
1494 new\s+SWFObject\(
1495 )
1496 (["\'])
1497 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1498 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1499 \1''', webpage)]
1500
1501 # lazyYT YouTube embed
1502 entries.extend(list(map(
1503 unescapeHTML,
1504 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1505
1506 # Wordpress "YouTube Video Importer" plugin
1507 matches = re.findall(r'''(?x)<div[^>]+
1508 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1509 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1510 entries.extend(m[-1] for m in matches)
1511
1512 return entries
1513
1514 @staticmethod
1515 def _extract_url(webpage):
1516 urls = YoutubeIE._extract_urls(webpage)
1517 return urls[0] if urls else None
1518
97665381
PH
1519 @classmethod
1520 def extract_id(cls, url):
1521 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1522 if mobj is None:
69ea8ca4 1523 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1524 video_id = mobj.group(2)
1525 return video_id
1526
545cc85d 1527 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1528 chapters_list = try_get(
8bdd16b4 1529 data,
84213ea8
S
1530 lambda x: x['playerOverlays']
1531 ['playerOverlayRenderer']
1532 ['decoratedPlayerBarRenderer']
1533 ['decoratedPlayerBarRenderer']
1534 ['playerBar']
1535 ['chapteredPlayerBarRenderer']
1536 ['chapters'],
1537 list)
1538 if not chapters_list:
1539 return
1540
1541 def chapter_time(chapter):
1542 return float_or_none(
1543 try_get(
1544 chapter,
1545 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1546 int),
1547 scale=1000)
1548 chapters = []
1549 for next_num, chapter in enumerate(chapters_list, start=1):
1550 start_time = chapter_time(chapter)
1551 if start_time is None:
1552 continue
1553 end_time = (chapter_time(chapters_list[next_num])
1554 if next_num < len(chapters_list) else duration)
1555 if end_time is None:
1556 continue
1557 title = try_get(
1558 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1559 compat_str)
1560 chapters.append({
1561 'start_time': start_time,
1562 'end_time': end_time,
1563 'title': title,
1564 })
1565 return chapters
1566
545cc85d 1567 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1568 return self._parse_json(self._search_regex(
1569 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1570 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1571
d92f5d5a 1572 @staticmethod
1573 def parse_time_text(time_text):
1574 """
1575 Parse the comment time text
1576 time_text is in the format 'X units ago (edited)'
1577 """
1578 time_text_split = time_text.split(' ')
1579 if len(time_text_split) >= 3:
1580 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1581
a1c5d2ca
M
1582 @staticmethod
1583 def _join_text_entries(runs):
1584 text = None
1585 for run in runs:
1586 if not isinstance(run, dict):
1587 continue
1588 sub_text = try_get(run, lambda x: x['text'], compat_str)
1589 if sub_text:
1590 if not text:
1591 text = sub_text
1592 continue
1593 text += sub_text
1594 return text
1595
1596 def _extract_comment(self, comment_renderer, parent=None):
1597 comment_id = comment_renderer.get('commentId')
1598 if not comment_id:
1599 return
1600 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1601 text = self._join_text_entries(comment_text_runs) or ''
1602 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1603 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1604 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1605 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1606 author_id = try_get(comment_renderer,
1607 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1608 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1609 lambda x: x['likeCount']), compat_str)) or 0
1610 author_thumbnail = try_get(comment_renderer,
1611 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1612
1613 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1614 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1615 return {
1616 'id': comment_id,
1617 'text': text,
d92f5d5a 1618 'timestamp': timestamp,
a1c5d2ca
M
1619 'time_text': time_text,
1620 'like_count': votes,
1621 'is_favorited': is_liked,
1622 'author': author,
1623 'author_id': author_id,
1624 'author_thumbnail': author_thumbnail,
1625 'author_is_uploader': author_is_uploader,
1626 'parent': parent or 'root'
1627 }
1628
1629 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1630 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1631
1632 def extract_thread(parent_renderer):
1633 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1634 if not parent:
1635 comment_counts[2] = 0
1636 for content in contents:
1637 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1638 comment_renderer = try_get(
1639 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1640 content, (lambda x: x['commentRenderer'], dict))
1641
1642 if not comment_renderer:
1643 continue
1644 comment = self._extract_comment(comment_renderer, parent)
1645 if not comment:
1646 continue
1647 comment_counts[0] += 1
1648 yield comment
1649 # Attempt to get the replies
1650 comment_replies_renderer = try_get(
1651 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1652
1653 if comment_replies_renderer:
1654 comment_counts[2] += 1
1655 comment_entries_iter = self._comment_entries(
f4f751af 1656 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1657 parent=comment.get('id'), session_token_list=session_token_list,
1658 comment_counts=comment_counts)
1659
1660 for reply_comment in comment_entries_iter:
1661 yield reply_comment
1662
1663 if not comment_counts:
1664 # comment so far, est. total comments, current comment thread #
1665 comment_counts = [0, 0, 0]
a1c5d2ca
M
1666
1667 # TODO: Generalize the download code with TabIE
f4f751af 1668 context = self._extract_context(ytcfg)
1669 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1670 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1671 first_continuation = False
1672 if parent is None:
1673 first_continuation = True
1674
1675 for page_num in itertools.count(0):
1676 if not continuation:
1677 break
f4f751af 1678 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a1c5d2ca
M
1679 retries = self._downloader.params.get('extractor_retries', 3)
1680 count = -1
1681 last_error = None
1682
1683 while count < retries:
1684 count += 1
1685 if last_error:
1686 self.report_warning('%s. Retrying ...' % last_error)
1687 try:
1688 query = {
1689 'ctoken': continuation['ctoken'],
1690 'pbj': 1,
1691 'type': 'next',
1692 }
1693 if parent:
1694 query['action_get_comment_replies'] = 1
1695 else:
1696 query['action_get_comments'] = 1
1697
1698 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1699 if page_num == 0:
1700 if first_continuation:
d92f5d5a 1701 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1702 else:
d92f5d5a 1703 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1704 else:
d92f5d5a 1705 note_prefix = '%sDownloading comment%s page %d %s' % (
1706 ' ' if parent else '',
a1c5d2ca
M
1707 ' replies' if parent else '',
1708 page_num,
1709 comment_prog_str)
1710
1711 browse = self._download_json(
1712 'https://www.youtube.com/comment_service_ajax', None,
1713 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1714 headers=headers, query=query,
1715 data=urlencode_postdata({
1716 'session_token': session_token_list[0]
1717 }))
1718 except ExtractorError as e:
1719 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1720 if e.cause.code == 413:
d92f5d5a 1721 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1722 return
1723 # Downloading page may result in intermittent 5xx HTTP error
1724 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1725 last_error = 'HTTP Error %s' % e.cause.code
1726 if e.cause.code == 404:
d92f5d5a 1727 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1728 if count < retries:
1729 continue
1730 raise
1731 else:
1732 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1733 if session_token:
1734 session_token_list[0] = session_token
1735
1736 response = try_get(browse,
1737 (lambda x: x['response'],
1738 lambda x: x[1]['response'])) or {}
1739
1740 if response.get('continuationContents'):
1741 break
1742
1743 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1744 if browse.get('reload'):
d92f5d5a 1745 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1746
1747 # TODO: not tested, merged from old extractor
1748 err_msg = browse.get('externalErrorMessage')
1749 if err_msg:
1750 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1751
1752 # Youtube sometimes sends incomplete data
1753 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1754 last_error = 'Incomplete data received'
1755 if count >= retries:
6a39ee13 1756 raise ExtractorError(last_error)
a1c5d2ca
M
1757
1758 if not response:
1759 break
f4f751af 1760 visitor_data = try_get(
1761 response,
1762 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1763 compat_str) or visitor_data
a1c5d2ca
M
1764
1765 known_continuation_renderers = {
1766 'itemSectionContinuation': extract_thread,
1767 'commentRepliesContinuation': extract_thread
1768 }
1769
1770 # extract next root continuation from the results
1771 continuation_contents = try_get(
1772 response, lambda x: x['continuationContents'], dict) or {}
1773
1774 for key, value in continuation_contents.items():
1775 if key not in known_continuation_renderers:
1776 continue
1777 continuation_renderer = value
1778
1779 if first_continuation:
1780 first_continuation = False
1781 expected_comment_count = try_get(
1782 continuation_renderer,
1783 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1784 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1785 compat_str)
1786
1787 if expected_comment_count:
1788 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1789 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1790 yield comment_counts[1]
1791
1792 # TODO: cli arg.
1793 # 1/True for newest, 0/False for popular (default)
1794 comment_sort_index = int(True)
1795 sort_continuation_renderer = try_get(
1796 continuation_renderer,
1797 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1798 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1799 # If this fails, the initial continuation page
1800 # starts off with popular anyways.
1801 if sort_continuation_renderer:
1802 continuation = YoutubeTabIE._build_continuation_query(
1803 continuation=sort_continuation_renderer.get('continuation'),
1804 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1805 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1806 break
1807
1808 for entry in known_continuation_renderers[key](continuation_renderer):
1809 yield entry
1810
1811 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1812 break
1813
1814 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1815 """Entry for comment extraction"""
1816 comments = []
1817 known_entry_comment_renderers = (
1818 'itemSectionRenderer',
1819 )
1820 estimated_total = 0
1821 for entry in contents:
1822 for key, renderer in entry.items():
1823 if key not in known_entry_comment_renderers:
1824 continue
1825
1826 comment_iter = self._comment_entries(
1827 renderer,
1828 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1829 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1830 ytcfg=ytcfg,
a1c5d2ca
M
1831 session_token_list=[xsrf_token])
1832
1833 for comment in comment_iter:
1834 if isinstance(comment, int):
1835 estimated_total = comment
1836 continue
1837 comments.append(comment)
1838 break
d92f5d5a 1839 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1840 return {
1841 'comments': comments,
1842 'comment_count': len(comments),
1843 }
1844
c5e8d7af 1845 def _real_extract(self, url):
cf7e015f 1846 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1847 video_id = self._match_id(url)
1848 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1849 webpage_url = base_url + 'watch?v=' + video_id
1850 webpage = self._download_webpage(
cce889b9 1851 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1852
1853 player_response = None
1854 if webpage:
1855 player_response = self._extract_yt_initial_variable(
1856 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1857 video_id, 'initial player response')
f4f751af 1858
1859 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1860 if not player_response:
1861 player_response = self._call_api(
f4f751af 1862 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1863
1864 playability_status = player_response.get('playabilityStatus') or {}
1865 if playability_status.get('reason') == 'Sign in to confirm your age':
1866 pr = self._parse_json(try_get(compat_parse_qs(
1867 self._download_webpage(
1868 base_url + 'get_video_info', video_id,
1869 'Refetching age-gated info webpage',
1870 'unable to download video info webpage', query={
1871 'video_id': video_id,
7c60c33e 1872 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1873 }, fatal=False)),
1874 lambda x: x['player_response'][0],
1875 compat_str) or '{}', video_id)
1876 if pr:
1877 player_response = pr
1878
1879 trailer_video_id = try_get(
1880 playability_status,
1881 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1882 compat_str)
1883 if trailer_video_id:
1884 return self.url_result(
1885 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1886
545cc85d 1887 def get_text(x):
1888 if not x:
c2d125d9 1889 return
f7ad7160 1890 text = x.get('simpleText')
1891 if text and isinstance(text, compat_str):
1892 return text
1893 runs = x.get('runs')
1894 if not isinstance(runs, list):
1895 return
1896 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
15be3eb5 1897
545cc85d 1898 search_meta = (
1899 lambda x: self._html_search_meta(x, webpage, default=None)) \
1900 if webpage else lambda x: None
dbdaaa23 1901
545cc85d 1902 video_details = player_response.get('videoDetails') or {}
37357d21 1903 microformat = try_get(
545cc85d 1904 player_response,
1905 lambda x: x['microformat']['playerMicroformatRenderer'],
1906 dict) or {}
1907 video_title = video_details.get('title') \
1908 or get_text(microformat.get('title')) \
1909 or search_meta(['og:title', 'twitter:title', 'title'])
1910 video_description = video_details.get('shortDescription')
cf7e015f 1911
8fe10494 1912 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1913 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1914 multifeed_metadata_list = try_get(
1915 player_response,
1916 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1917 compat_str)
8fe10494
S
1918 if multifeed_metadata_list:
1919 entries = []
1920 feed_ids = []
1921 for feed in multifeed_metadata_list.split(','):
1922 # Unquote should take place before split on comma (,) since textual
1923 # fields may contain comma as well (see
067aa17e 1924 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1925 feed_data = compat_parse_qs(
1926 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1927
1928 def feed_entry(name):
545cc85d 1929 return try_get(
1930 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1931
1932 feed_id = feed_entry('id')
1933 if not feed_id:
1934 continue
1935 feed_title = feed_entry('title')
1936 title = video_title
1937 if feed_title:
1938 title += ' (%s)' % feed_title
8fe10494
S
1939 entries.append({
1940 '_type': 'url_transparent',
1941 'ie_key': 'Youtube',
1942 'url': smuggle_url(
545cc85d 1943 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1944 {'force_singlefeed': True}),
6b09401b 1945 'title': title,
8fe10494 1946 })
6b09401b 1947 feed_ids.append(feed_id)
8fe10494
S
1948 self.to_screen(
1949 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1950 % (', '.join(feed_ids), video_id))
545cc85d 1951 return self.playlist_result(
1952 entries, video_id, video_title, video_description)
8fe10494
S
1953 else:
1954 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1955
545cc85d 1956 formats = []
1957 itags = []
cc2db878 1958 itag_qualities = {}
545cc85d 1959 player_url = None
dca3ff4a 1960 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1961 streaming_data = player_response.get('streamingData') or {}
1962 streaming_formats = streaming_data.get('formats') or []
1963 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1964 for fmt in streaming_formats:
1965 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1966 continue
321bf820 1967
cc2db878 1968 itag = str_or_none(fmt.get('itag'))
1969 quality = fmt.get('quality')
1970 if itag and quality:
1971 itag_qualities[itag] = quality
1972 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1973 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1974 # number of fragment that would subsequently requested with (`&sq=N`)
1975 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1976 continue
1977
545cc85d 1978 fmt_url = fmt.get('url')
1979 if not fmt_url:
1980 sc = compat_parse_qs(fmt.get('signatureCipher'))
1981 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1982 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1983 if not (sc and fmt_url and encrypted_sig):
1984 continue
1985 if not player_url:
1986 if not webpage:
1987 continue
1988 player_url = self._search_regex(
1989 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1990 webpage, 'player URL', fatal=False)
1991 if not player_url:
201e9eaa 1992 continue
545cc85d 1993 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1994 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1995 fmt_url += '&' + sp + '=' + signature
1996
545cc85d 1997 if itag:
1998 itags.append(itag)
cc2db878 1999 tbr = float_or_none(
2000 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2001 dct = {
2002 'asr': int_or_none(fmt.get('audioSampleRate')),
2003 'filesize': int_or_none(fmt.get('contentLength')),
2004 'format_id': itag,
2005 'format_note': fmt.get('qualityLabel') or quality,
2006 'fps': int_or_none(fmt.get('fps')),
2007 'height': int_or_none(fmt.get('height')),
dca3ff4a 2008 'quality': q(quality),
cc2db878 2009 'tbr': tbr,
545cc85d 2010 'url': fmt_url,
2011 'width': fmt.get('width'),
2012 }
2013 mimetype = fmt.get('mimeType')
2014 if mimetype:
2015 mobj = re.match(
2016 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2017 if mobj:
2018 dct['ext'] = mimetype2ext(mobj.group(1))
2019 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2020 no_audio = dct.get('acodec') == 'none'
2021 no_video = dct.get('vcodec') == 'none'
2022 if no_audio:
2023 dct['vbr'] = tbr
2024 if no_video:
2025 dct['abr'] = tbr
2026 if no_audio or no_video:
545cc85d 2027 dct['downloader_options'] = {
2028 # Youtube throttles chunks >~10M
2029 'http_chunk_size': 10485760,
bf1317d2 2030 }
7c60c33e 2031 if dct.get('ext'):
2032 dct['container'] = dct['ext'] + '_dash'
545cc85d 2033 formats.append(dct)
2034
2035 hls_manifest_url = streaming_data.get('hlsManifestUrl')
2036 if hls_manifest_url:
2037 for f in self._extract_m3u8_formats(
2038 hls_manifest_url, video_id, 'mp4', fatal=False):
2039 itag = self._search_regex(
2040 r'/itag/(\d+)', f['url'], 'itag', default=None)
2041 if itag:
2042 f['format_id'] = itag
2043 formats.append(f)
2044
1418a043 2045 if self._downloader.params.get('youtube_include_dash_manifest', True):
545cc85d 2046 dash_manifest_url = streaming_data.get('dashManifestUrl')
2047 if dash_manifest_url:
545cc85d 2048 for f in self._extract_mpd_formats(
2049 dash_manifest_url, video_id, fatal=False):
cc2db878 2050 itag = f['format_id']
2051 if itag in itags:
2052 continue
dca3ff4a 2053 if itag in itag_qualities:
2054 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
2055 # but kept to maintain feature parity (and code similarity) with youtube-dl
2056 # Remove if this causes any issues with sorting in future
2057 f['quality'] = q(itag_qualities[itag])
545cc85d 2058 filesize = int_or_none(self._search_regex(
2059 r'/clen/(\d+)', f.get('fragment_base_url')
2060 or f['url'], 'file size', default=None))
2061 if filesize:
2062 f['filesize'] = filesize
cc2db878 2063 formats.append(f)
bf1317d2 2064
545cc85d 2065 if not formats:
63ad4d43 2066 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2067 self.raise_no_formats(
545cc85d 2068 'This video is DRM protected.', expected=True)
2069 pemr = try_get(
2070 playability_status,
2071 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2072 dict) or {}
2073 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2074 subreason = pemr.get('subreason')
2075 if subreason:
2076 subreason = clean_html(get_text(subreason))
2077 if subreason == 'The uploader has not made this video available in your country.':
2078 countries = microformat.get('availableCountries')
2079 if not countries:
2080 regions_allowed = search_meta('regionsAllowed')
2081 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2082 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2083 reason += '\n' + subreason
2084 if reason:
b7da73eb 2085 self.raise_no_formats(reason, expected=True)
bf1317d2 2086
545cc85d 2087 self._sort_formats(formats)
bf1317d2 2088
545cc85d 2089 keywords = video_details.get('keywords') or []
2090 if not keywords and webpage:
2091 keywords = [
2092 unescapeHTML(m.group('content'))
2093 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2094 for keyword in keywords:
2095 if keyword.startswith('yt:stretch='):
201c1459 2096 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2097 if mobj:
2098 # NB: float is intentional for forcing float division
2099 w, h = (float(v) for v in mobj.groups())
2100 if w > 0 and h > 0:
2101 ratio = w / h
2102 for f in formats:
2103 if f.get('vcodec') != 'none':
2104 f['stretched_ratio'] = ratio
2105 break
6449cd80 2106
545cc85d 2107 thumbnails = []
2108 for container in (video_details, microformat):
2109 for thumbnail in (try_get(
2110 container,
2111 lambda x: x['thumbnail']['thumbnails'], list) or []):
2112 thumbnail_url = thumbnail.get('url')
2113 if not thumbnail_url:
bf1317d2 2114 continue
1988fab7 2115 # Sometimes youtube gives a wrong thumbnail URL. See:
2116 # https://github.com/yt-dlp/yt-dlp/issues/233
2117 # https://github.com/ytdl-org/youtube-dl/issues/28023
2118 if 'maxresdefault' in thumbnail_url:
2119 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2120 thumbnails.append({
2121 'height': int_or_none(thumbnail.get('height')),
2122 'url': thumbnail_url,
2123 'width': int_or_none(thumbnail.get('width')),
2124 })
2125 if thumbnails:
2126 break
a6211d23 2127 else:
545cc85d 2128 thumbnail = search_meta(['og:image', 'twitter:image'])
2129 if thumbnail:
2130 thumbnails = [{'url': thumbnail}]
2131
2132 category = microformat.get('category') or search_meta('genre')
2133 channel_id = video_details.get('channelId') \
2134 or microformat.get('externalChannelId') \
2135 or search_meta('channelId')
2136 duration = int_or_none(
2137 video_details.get('lengthSeconds')
2138 or microformat.get('lengthSeconds')) \
2139 or parse_duration(search_meta('duration'))
2140 is_live = video_details.get('isLive')
2141 owner_profile_url = microformat.get('ownerProfileUrl')
2142
2143 info = {
2144 'id': video_id,
2145 'title': self._live_title(video_title) if is_live else video_title,
2146 'formats': formats,
2147 'thumbnails': thumbnails,
2148 'description': video_description,
2149 'upload_date': unified_strdate(
2150 microformat.get('uploadDate')
2151 or search_meta('uploadDate')),
2152 'uploader': video_details['author'],
2153 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2154 'uploader_url': owner_profile_url,
2155 'channel_id': channel_id,
2156 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2157 'duration': duration,
2158 'view_count': int_or_none(
2159 video_details.get('viewCount')
2160 or microformat.get('viewCount')
2161 or search_meta('interactionCount')),
2162 'average_rating': float_or_none(video_details.get('averageRating')),
2163 'age_limit': 18 if (
2164 microformat.get('isFamilySafe') is False
2165 or search_meta('isFamilyFriendly') == 'false'
2166 or search_meta('og:restrictions:age') == '18+') else 0,
2167 'webpage_url': webpage_url,
2168 'categories': [category] if category else None,
2169 'tags': keywords,
2170 'is_live': is_live,
2171 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2172 'was_live': video_details.get('isLiveContent'),
545cc85d 2173 }
b477fc13 2174
545cc85d 2175 pctr = try_get(
2176 player_response,
2177 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2178 subtitles = {}
2179 if pctr:
2180 def process_language(container, base_url, lang_code, query):
2181 lang_subs = []
2182 for fmt in self._SUBTITLE_FORMATS:
2183 query.update({
2184 'fmt': fmt,
2185 })
2186 lang_subs.append({
2187 'ext': fmt,
2188 'url': update_url_query(base_url, query),
2189 })
2190 container[lang_code] = lang_subs
7e72694b 2191
545cc85d 2192 for caption_track in (pctr.get('captionTracks') or []):
2193 base_url = caption_track.get('baseUrl')
2194 if not base_url:
2195 continue
2196 if caption_track.get('kind') != 'asr':
2197 lang_code = caption_track.get('languageCode')
2198 if not lang_code:
2199 continue
2200 process_language(
2201 subtitles, base_url, lang_code, {})
2202 continue
2203 automatic_captions = {}
2204 for translation_language in (pctr.get('translationLanguages') or []):
2205 translation_language_code = translation_language.get('languageCode')
2206 if not translation_language_code:
2207 continue
2208 process_language(
2209 automatic_captions, base_url, translation_language_code,
2210 {'tlang': translation_language_code})
2211 info['automatic_captions'] = automatic_captions
2212 info['subtitles'] = subtitles
7e72694b 2213
545cc85d 2214 parsed_url = compat_urllib_parse_urlparse(url)
2215 for component in [parsed_url.fragment, parsed_url.query]:
2216 query = compat_parse_qs(component)
2217 for k, v in query.items():
2218 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2219 d_k += '_time'
2220 if d_k not in info and k in s_ks:
2221 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2222
2223 # Youtube Music Auto-generated description
822b9d9c 2224 if video_description:
38d70284 2225 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2226 if mobj:
822b9d9c
RA
2227 release_year = mobj.group('release_year')
2228 release_date = mobj.group('release_date')
2229 if release_date:
2230 release_date = release_date.replace('-', '')
2231 if not release_year:
545cc85d 2232 release_year = release_date[:4]
2233 info.update({
2234 'album': mobj.group('album'.strip()),
2235 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2236 'track': mobj.group('track').strip(),
2237 'release_date': release_date,
cc2db878 2238 'release_year': int_or_none(release_year),
545cc85d 2239 })
7e72694b 2240
545cc85d 2241 initial_data = None
2242 if webpage:
2243 initial_data = self._extract_yt_initial_variable(
2244 webpage, self._YT_INITIAL_DATA_RE, video_id,
2245 'yt initial data')
2246 if not initial_data:
2247 initial_data = self._call_api(
f4f751af 2248 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2249
2250 if not is_live:
2251 try:
2252 # This will error if there is no livechat
2253 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2254 info['subtitles']['live_chat'] = [{
394dcd44 2255 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2256 'video_id': video_id,
2257 'ext': 'json',
2258 'protocol': 'youtube_live_chat_replay',
2259 }]
2260 except (KeyError, IndexError, TypeError):
2261 pass
2262
2263 if initial_data:
2264 chapters = self._extract_chapters_from_json(
2265 initial_data, video_id, duration)
2266 if not chapters:
2267 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2268 contents = try_get(
2269 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2270 list)
2271 if not contents:
2272 continue
2273
2274 def chapter_time(mmlir):
2275 return parse_duration(
2276 get_text(mmlir.get('timeDescription')))
2277
2278 chapters = []
2279 for next_num, content in enumerate(contents, start=1):
2280 mmlir = content.get('macroMarkersListItemRenderer') or {}
2281 start_time = chapter_time(mmlir)
2282 end_time = chapter_time(try_get(
2283 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2284 if next_num < len(contents) else duration
2285 if start_time is None or end_time is None:
2286 continue
2287 chapters.append({
2288 'start_time': start_time,
2289 'end_time': end_time,
2290 'title': get_text(mmlir.get('title')),
2291 })
2292 if chapters:
2293 break
2294 if chapters:
2295 info['chapters'] = chapters
2296
2297 contents = try_get(
2298 initial_data,
2299 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2300 list) or []
2301 for content in contents:
2302 vpir = content.get('videoPrimaryInfoRenderer')
2303 if vpir:
2304 stl = vpir.get('superTitleLink')
2305 if stl:
2306 stl = get_text(stl)
2307 if try_get(
2308 vpir,
2309 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2310 info['location'] = stl
2311 else:
2312 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2313 if mobj:
2314 info.update({
2315 'series': mobj.group(1),
2316 'season_number': int(mobj.group(2)),
2317 'episode_number': int(mobj.group(3)),
2318 })
2319 for tlb in (try_get(
2320 vpir,
2321 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2322 list) or []):
2323 tbr = tlb.get('toggleButtonRenderer') or {}
2324 for getter, regex in [(
2325 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2326 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2327 lambda x: x['accessibility'],
2328 lambda x: x['accessibilityData']['accessibilityData'],
2329 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2330 label = (try_get(tbr, getter, dict) or {}).get('label')
2331 if label:
2332 mobj = re.match(regex, label)
2333 if mobj:
2334 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2335 break
2336 sbr_tooltip = try_get(
2337 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2338 if sbr_tooltip:
2339 like_count, dislike_count = sbr_tooltip.split(' / ')
2340 info.update({
2341 'like_count': str_to_int(like_count),
2342 'dislike_count': str_to_int(dislike_count),
2343 })
2344 vsir = content.get('videoSecondaryInfoRenderer')
2345 if vsir:
2346 info['channel'] = get_text(try_get(
2347 vsir,
2348 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2349 dict))
545cc85d 2350 rows = try_get(
2351 vsir,
2352 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2353 list) or []
2354 multiple_songs = False
2355 for row in rows:
2356 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2357 multiple_songs = True
2358 break
2359 for row in rows:
2360 mrr = row.get('metadataRowRenderer') or {}
2361 mrr_title = mrr.get('title')
2362 if not mrr_title:
2363 continue
2364 mrr_title = get_text(mrr['title'])
2365 mrr_contents_text = get_text(mrr['contents'][0])
2366 if mrr_title == 'License':
2367 info['license'] = mrr_contents_text
2368 elif not multiple_songs:
2369 if mrr_title == 'Album':
2370 info['album'] = mrr_contents_text
2371 elif mrr_title == 'Artist':
2372 info['artist'] = mrr_contents_text
2373 elif mrr_title == 'Song':
2374 info['track'] = mrr_contents_text
2375
2376 fallbacks = {
2377 'channel': 'uploader',
2378 'channel_id': 'uploader_id',
2379 'channel_url': 'uploader_url',
2380 }
2381 for to, frm in fallbacks.items():
2382 if not info.get(to):
2383 info[to] = info.get(frm)
2384
2385 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2386 v = info.get(s_k)
2387 if v:
2388 info[d_k] = v
b84071c0 2389
c224251a
M
2390 is_private = bool_or_none(video_details.get('isPrivate'))
2391 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2392 is_membersonly = None
b28f8d24 2393 is_premium = None
c224251a
M
2394 if initial_data and is_private is not None:
2395 is_membersonly = False
b28f8d24 2396 is_premium = False
c224251a
M
2397 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2398 for content in contents or []:
2399 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2400 for badge in badges or []:
2401 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2402 if label.lower() == 'members only':
2403 is_membersonly = True
2404 break
b28f8d24
M
2405 elif label.lower() == 'premium':
2406 is_premium = True
2407 break
2408 if is_membersonly or is_premium:
c224251a
M
2409 break
2410
2411 # TODO: Add this for playlists
2412 info['availability'] = self._availability(
2413 is_private=is_private,
b28f8d24 2414 needs_premium=is_premium,
c224251a
M
2415 needs_subscription=is_membersonly,
2416 needs_auth=info['age_limit'] >= 18,
2417 is_unlisted=None if is_private is None else is_unlisted)
2418
06167fbb 2419 # get xsrf for annotations or comments
2420 get_annotations = self._downloader.params.get('writeannotations', False)
2421 get_comments = self._downloader.params.get('getcomments', False)
2422 if get_annotations or get_comments:
29f7c58a 2423 xsrf_token = None
545cc85d 2424 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2425 if ytcfg:
2426 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2427 if not xsrf_token:
2428 xsrf_token = self._search_regex(
2429 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2430 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2431
2432 # annotations
06167fbb 2433 if get_annotations:
64b6a4e9
RA
2434 invideo_url = try_get(
2435 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2436 if xsrf_token and invideo_url:
29f7c58a 2437 xsrf_field_name = None
2438 if ytcfg:
2439 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2440 if not xsrf_field_name:
2441 xsrf_field_name = self._search_regex(
2442 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2443 webpage, 'xsrf field name',
29f7c58a 2444 group='xsrf_field_name', default='session_token')
8a784c74 2445 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2446 self._proto_relative_url(invideo_url),
2447 video_id, note='Downloading annotations',
2448 errnote='Unable to download video annotations', fatal=False,
2449 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2450
277d6ff5 2451 if get_comments:
a1c5d2ca 2452 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2453
545cc85d 2454 self.mark_watched(video_id, player_response)
d77ab8e2 2455
545cc85d 2456 return info
c5e8d7af 2457
5f6a1245 2458
8bdd16b4 2459class YoutubeTabIE(YoutubeBaseInfoExtractor):
2460 IE_DESC = 'YouTube.com tab'
70d5c17b 2461 _VALID_URL = r'''(?x)
2462 https?://
2463 (?:\w+\.)?
2464 (?:
2465 youtube(?:kids)?\.com|
2466 invidio\.us
2467 )/
2468 (?:
2469 (?:channel|c|user)/|
2470 (?P<not_channel>
9ba5705a 2471 feed/|hashtag/|
70d5c17b 2472 (?:playlist|watch)\?.*?\blist=
2473 )|
29f7c58a 2474 (?!(?:%s)\b) # Direct URLs
70d5c17b 2475 )
2476 (?P<id>[^/?\#&]+)
2477 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2478 IE_NAME = 'youtube:tab'
2479
81127aa5 2480 _TESTS = [{
8bdd16b4 2481 # playlists, multipage
2482 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2483 'playlist_mincount': 94,
2484 'info_dict': {
2485 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2486 'title': 'Игорь Клейнер - Playlists',
2487 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2488 'uploader': 'Игорь Клейнер',
2489 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2490 },
2491 }, {
2492 # playlists, multipage, different order
2493 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2494 'playlist_mincount': 94,
2495 'info_dict': {
2496 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2497 'title': 'Игорь Клейнер - Playlists',
2498 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2499 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2500 'uploader': 'Игорь Клейнер',
8bdd16b4 2501 },
201c1459 2502 }, {
2503 # playlists, series
2504 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2505 'playlist_mincount': 5,
2506 'info_dict': {
2507 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2508 'title': '3Blue1Brown - Playlists',
2509 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2510 },
8bdd16b4 2511 }, {
2512 # playlists, singlepage
2513 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2514 'playlist_mincount': 4,
2515 'info_dict': {
2516 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2517 'title': 'ThirstForScience - Playlists',
2518 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2519 'uploader': 'ThirstForScience',
2520 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2521 }
2522 }, {
2523 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2524 'only_matching': True,
2525 }, {
2526 # basic, single video playlist
0e30a7b9 2527 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2528 'info_dict': {
0e30a7b9 2529 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2530 'uploader': 'Sergey M.',
2531 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2532 'title': 'youtube-dl public playlist',
81127aa5 2533 },
0e30a7b9 2534 'playlist_count': 1,
9291475f 2535 }, {
8bdd16b4 2536 # empty playlist
0e30a7b9 2537 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2538 'info_dict': {
0e30a7b9 2539 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2540 'uploader': 'Sergey M.',
2541 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2542 'title': 'youtube-dl empty playlist',
9291475f
PH
2543 },
2544 'playlist_count': 0,
2545 }, {
8bdd16b4 2546 # Home tab
2547 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2548 'info_dict': {
8bdd16b4 2549 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2550 'title': 'lex will - Home',
2551 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2552 'uploader': 'lex will',
2553 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2554 },
8bdd16b4 2555 'playlist_mincount': 2,
9291475f 2556 }, {
8bdd16b4 2557 # Videos tab
2558 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2559 'info_dict': {
8bdd16b4 2560 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2561 'title': 'lex will - Videos',
2562 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2563 'uploader': 'lex will',
2564 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2565 },
8bdd16b4 2566 'playlist_mincount': 975,
9291475f 2567 }, {
8bdd16b4 2568 # Videos tab, sorted by popular
2569 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2570 'info_dict': {
8bdd16b4 2571 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2572 'title': 'lex will - Videos',
2573 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2574 'uploader': 'lex will',
2575 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2576 },
8bdd16b4 2577 'playlist_mincount': 199,
9291475f 2578 }, {
8bdd16b4 2579 # Playlists tab
2580 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2581 'info_dict': {
8bdd16b4 2582 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2583 'title': 'lex will - Playlists',
2584 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2585 'uploader': 'lex will',
2586 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2587 },
8bdd16b4 2588 'playlist_mincount': 17,
ac7553d0 2589 }, {
8bdd16b4 2590 # Community tab
2591 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2592 'info_dict': {
8bdd16b4 2593 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2594 'title': 'lex will - Community',
2595 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2596 'uploader': 'lex will',
2597 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2598 },
2599 'playlist_mincount': 18,
87dadd45 2600 }, {
8bdd16b4 2601 # Channels tab
2602 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2603 'info_dict': {
8bdd16b4 2604 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2605 'title': 'lex will - Channels',
2606 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2607 'uploader': 'lex will',
2608 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2609 },
deaec5af 2610 'playlist_mincount': 12,
6b08cdf6 2611 }, {
a0566bbf 2612 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2613 'only_matching': True,
2614 }, {
a0566bbf 2615 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2616 'only_matching': True,
2617 }, {
a0566bbf 2618 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2619 'only_matching': True,
2620 }, {
2621 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2622 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2623 'info_dict': {
2624 'title': '29C3: Not my department',
2625 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2626 'uploader': 'Christiaan008',
2627 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2628 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2629 },
2630 'playlist_count': 96,
2631 }, {
2632 'note': 'Large playlist',
2633 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2634 'info_dict': {
8bdd16b4 2635 'title': 'Uploads from Cauchemar',
2636 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2637 'uploader': 'Cauchemar',
2638 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2639 },
8bdd16b4 2640 'playlist_mincount': 1123,
2641 }, {
2642 # even larger playlist, 8832 videos
2643 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2644 'only_matching': True,
4b7df0d3
JMF
2645 }, {
2646 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2647 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2648 'info_dict': {
acf757f4
PH
2649 'title': 'Uploads from Interstellar Movie',
2650 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2651 'uploader': 'Interstellar Movie',
8bdd16b4 2652 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2653 },
481cc733 2654 'playlist_mincount': 21,
358de58c 2655 }, {
2656 'note': 'Playlist with "show unavailable videos" button',
2657 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2658 'info_dict': {
2659 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2660 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2661 'uploader': 'Phim Siêu Nhân Nhật Bản',
2662 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2663 },
2664 'playlist_mincount': 1400,
2665 'expected_warnings': [
2666 'YouTube said: INFO - Unavailable videos are hidden',
2667 ]
5d342002 2668 }, {
2669 'note': 'Playlist with unavailable videos in a later page',
2670 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2671 'info_dict': {
2672 'title': 'Uploads from BlankTV',
2673 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2674 'uploader': 'BlankTV',
2675 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2676 },
2677 'playlist_mincount': 20000,
8bdd16b4 2678 }, {
2679 # https://github.com/ytdl-org/youtube-dl/issues/21844
2680 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2681 'info_dict': {
2682 'title': 'Data Analysis with Dr Mike Pound',
2683 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2684 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2685 'uploader': 'Computerphile',
deaec5af 2686 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2687 },
2688 'playlist_mincount': 11,
2689 }, {
a0566bbf 2690 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2691 'only_matching': True,
dacb3a86
S
2692 }, {
2693 # Playlist URL that does not actually serve a playlist
2694 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2695 'info_dict': {
2696 'id': 'FqZTN594JQw',
2697 'ext': 'webm',
2698 'title': "Smiley's People 01 detective, Adventure Series, Action",
2699 'uploader': 'STREEM',
2700 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2701 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2702 'upload_date': '20150526',
2703 'license': 'Standard YouTube License',
2704 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2705 'categories': ['People & Blogs'],
2706 'tags': list,
dbdaaa23 2707 'view_count': int,
dacb3a86
S
2708 'like_count': int,
2709 'dislike_count': int,
2710 },
2711 'params': {
2712 'skip_download': True,
2713 },
13a75688 2714 'skip': 'This video is not available.',
dacb3a86 2715 'add_ie': [YoutubeIE.ie_key()],
481cc733 2716 }, {
8bdd16b4 2717 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2718 'only_matching': True,
66b48727 2719 }, {
8bdd16b4 2720 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2721 'only_matching': True,
a0566bbf 2722 }, {
2723 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2724 'info_dict': {
2725 'id': '9Auq9mYxFEE',
2726 'ext': 'mp4',
deaec5af 2727 'title': compat_str,
a0566bbf 2728 'uploader': 'Sky News',
2729 'uploader_id': 'skynews',
2730 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2731 'upload_date': '20191102',
deaec5af 2732 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2733 'categories': ['News & Politics'],
2734 'tags': list,
2735 'like_count': int,
2736 'dislike_count': int,
2737 },
2738 'params': {
2739 'skip_download': True,
2740 },
2741 }, {
2742 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2743 'info_dict': {
2744 'id': 'a48o2S1cPoo',
2745 'ext': 'mp4',
2746 'title': 'The Young Turks - Live Main Show',
2747 'uploader': 'The Young Turks',
2748 'uploader_id': 'TheYoungTurks',
2749 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2750 'upload_date': '20150715',
2751 'license': 'Standard YouTube License',
2752 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2753 'categories': ['News & Politics'],
2754 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2755 'like_count': int,
2756 'dislike_count': int,
2757 },
2758 'params': {
2759 'skip_download': True,
2760 },
2761 'only_matching': True,
2762 }, {
2763 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2764 'only_matching': True,
2765 }, {
2766 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2767 'only_matching': True,
3d3dddc9 2768 }, {
2769 'url': 'https://www.youtube.com/feed/trending',
2770 'only_matching': True,
2771 }, {
2772 # needs auth
2773 'url': 'https://www.youtube.com/feed/library',
2774 'only_matching': True,
2775 }, {
2776 # needs auth
2777 'url': 'https://www.youtube.com/feed/history',
2778 'only_matching': True,
2779 }, {
2780 # needs auth
2781 'url': 'https://www.youtube.com/feed/subscriptions',
2782 'only_matching': True,
2783 }, {
2784 # needs auth
2785 'url': 'https://www.youtube.com/feed/watch_later',
2786 'only_matching': True,
2787 }, {
2788 # no longer available?
2789 'url': 'https://www.youtube.com/feed/recommended',
2790 'only_matching': True,
29f7c58a 2791 }, {
2792 # inline playlist with not always working continuations
2793 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2794 'only_matching': True,
2795 }, {
2796 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2797 'only_matching': True,
2798 }, {
2799 'url': 'https://www.youtube.com/course',
2800 'only_matching': True,
2801 }, {
2802 'url': 'https://www.youtube.com/zsecurity',
2803 'only_matching': True,
2804 }, {
2805 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2806 'only_matching': True,
2807 }, {
2808 'url': 'https://www.youtube.com/TheYoungTurks/live',
2809 'only_matching': True,
39ed931e 2810 }, {
2811 'url': 'https://www.youtube.com/hashtag/cctv9',
2812 'info_dict': {
2813 'id': 'cctv9',
2814 'title': '#cctv9',
2815 },
2816 'playlist_mincount': 350,
201c1459 2817 }, {
2818 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2819 'only_matching': True,
29f7c58a 2820 }]
2821
2822 @classmethod
2823 def suitable(cls, url):
2824 return False if YoutubeIE.suitable(url) else super(
2825 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2826
2827 def _extract_channel_id(self, webpage):
2828 channel_id = self._html_search_meta(
2829 'channelId', webpage, 'channel id', default=None)
2830 if channel_id:
2831 return channel_id
2832 channel_url = self._html_search_meta(
2833 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2834 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2835 'twitter:app:url:googleplay'), webpage, 'channel url')
2836 return self._search_regex(
2837 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2838 channel_url, 'channel id')
15f6397c 2839
8bdd16b4 2840 @staticmethod
cd7c66cf 2841 def _extract_basic_item_renderer(item):
2842 # Modified from _extract_grid_item_renderer
201c1459 2843 known_basic_renderers = (
2844 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 2845 )
2846 for key, renderer in item.items():
201c1459 2847 if not isinstance(renderer, dict):
cd7c66cf 2848 continue
201c1459 2849 elif key in known_basic_renderers:
2850 return renderer
2851 elif key.startswith('grid') and key.endswith('Renderer'):
2852 return renderer
8bdd16b4 2853
8bdd16b4 2854 def _grid_entries(self, grid_renderer):
2855 for item in grid_renderer['items']:
2856 if not isinstance(item, dict):
39b62db1 2857 continue
cd7c66cf 2858 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2859 if not isinstance(renderer, dict):
2860 continue
2861 title = try_get(
201c1459 2862 renderer, (lambda x: x['title']['runs'][0]['text'],
2863 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 2864 # playlist
2865 playlist_id = renderer.get('playlistId')
2866 if playlist_id:
2867 yield self.url_result(
2868 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2869 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2870 video_title=title)
201c1459 2871 continue
8bdd16b4 2872 # video
2873 video_id = renderer.get('videoId')
2874 if video_id:
2875 yield self._extract_video(renderer)
201c1459 2876 continue
8bdd16b4 2877 # channel
2878 channel_id = renderer.get('channelId')
2879 if channel_id:
2880 title = try_get(
2881 renderer, lambda x: x['title']['simpleText'], compat_str)
2882 yield self.url_result(
2883 'https://www.youtube.com/channel/%s' % channel_id,
2884 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 2885 continue
2886 # generic endpoint URL support
2887 ep_url = urljoin('https://www.youtube.com/', try_get(
2888 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
2889 compat_str))
2890 if ep_url:
2891 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
2892 if ie.suitable(ep_url):
2893 yield self.url_result(
2894 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
2895 break
8bdd16b4 2896
3d3dddc9 2897 def _shelf_entries_from_content(self, shelf_renderer):
2898 content = shelf_renderer.get('content')
2899 if not isinstance(content, dict):
8bdd16b4 2900 return
cd7c66cf 2901 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2902 if renderer:
2903 # TODO: add support for nested playlists so each shelf is processed
2904 # as separate playlist
2905 # TODO: this includes only first N items
2906 for entry in self._grid_entries(renderer):
2907 yield entry
2908 renderer = content.get('horizontalListRenderer')
2909 if renderer:
2910 # TODO
2911 pass
8bdd16b4 2912
29f7c58a 2913 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2914 ep = try_get(
2915 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2916 compat_str)
2917 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2918 if shelf_url:
29f7c58a 2919 # Skipping links to another channels, note that checking for
2920 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2921 # will not work
2922 if skip_channels and '/channels?' in shelf_url:
2923 return
3d3dddc9 2924 title = try_get(
2925 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2926 yield self.url_result(shelf_url, video_title=title)
2927 # Shelf may not contain shelf URL, fallback to extraction from content
2928 for entry in self._shelf_entries_from_content(shelf_renderer):
2929 yield entry
c5e8d7af 2930
8bdd16b4 2931 def _playlist_entries(self, video_list_renderer):
2932 for content in video_list_renderer['contents']:
2933 if not isinstance(content, dict):
2934 continue
2935 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2936 if not isinstance(renderer, dict):
2937 continue
2938 video_id = renderer.get('videoId')
2939 if not video_id:
2940 continue
2941 yield self._extract_video(renderer)
07aeced6 2942
3462ffa8 2943 def _rich_entries(self, rich_grid_renderer):
2944 renderer = try_get(
70d5c17b 2945 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2946 video_id = renderer.get('videoId')
2947 if not video_id:
2948 return
2949 yield self._extract_video(renderer)
2950
8bdd16b4 2951 def _video_entry(self, video_renderer):
2952 video_id = video_renderer.get('videoId')
2953 if video_id:
2954 return self._extract_video(video_renderer)
dacb3a86 2955
8bdd16b4 2956 def _post_thread_entries(self, post_thread_renderer):
2957 post_renderer = try_get(
2958 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2959 if not post_renderer:
2960 return
2961 # video attachment
2962 video_renderer = try_get(
2963 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2964 video_id = None
2965 if video_renderer:
2966 entry = self._video_entry(video_renderer)
2967 if entry:
2968 yield entry
2969 # inline video links
2970 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2971 for run in runs:
2972 if not isinstance(run, dict):
2973 continue
2974 ep_url = try_get(
2975 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2976 if not ep_url:
2977 continue
2978 if not YoutubeIE.suitable(ep_url):
2979 continue
2980 ep_video_id = YoutubeIE._match_id(ep_url)
2981 if video_id == ep_video_id:
2982 continue
2983 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2984
8bdd16b4 2985 def _post_thread_continuation_entries(self, post_thread_continuation):
2986 contents = post_thread_continuation.get('contents')
2987 if not isinstance(contents, list):
2988 return
2989 for content in contents:
2990 renderer = content.get('backstagePostThreadRenderer')
2991 if not isinstance(renderer, dict):
2992 continue
2993 for entry in self._post_thread_entries(renderer):
2994 yield entry
07aeced6 2995
39ed931e 2996 r''' # unused
2997 def _rich_grid_entries(self, contents):
2998 for content in contents:
2999 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3000 if video_renderer:
3001 entry = self._video_entry(video_renderer)
3002 if entry:
3003 yield entry
3004 '''
3005
29f7c58a 3006 @staticmethod
3007 def _build_continuation_query(continuation, ctp=None):
3008 query = {
3009 'ctoken': continuation,
3010 'continuation': continuation,
3011 }
3012 if ctp:
3013 query['itct'] = ctp
3014 return query
3015
8bdd16b4 3016 @staticmethod
3017 def _extract_next_continuation_data(renderer):
3018 next_continuation = try_get(
3019 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3020 if not next_continuation:
3021 return
3022 continuation = next_continuation.get('continuation')
3023 if not continuation:
3024 return
3025 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3026 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3027
8bdd16b4 3028 @classmethod
3029 def _extract_continuation(cls, renderer):
3030 next_continuation = cls._extract_next_continuation_data(renderer)
3031 if next_continuation:
3032 return next_continuation
cc2db878 3033 contents = []
3034 for key in ('contents', 'items'):
3035 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3036 for content in contents:
3037 if not isinstance(content, dict):
3038 continue
3039 continuation_ep = try_get(
3040 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3041 dict)
3042 if not continuation_ep:
3043 continue
3044 continuation = try_get(
3045 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3046 if not continuation:
3047 continue
3048 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3049 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3050
f4f751af 3051 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3052
70d5c17b 3053 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3054 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3055 for content in contents:
3056 if not isinstance(content, dict):
8bdd16b4 3057 continue
70d5c17b 3058 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3059 if not is_renderer:
70d5c17b 3060 renderer = content.get('richItemRenderer')
3462ffa8 3061 if renderer:
3062 for entry in self._rich_entries(renderer):
3063 yield entry
3064 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3065 continue
3462ffa8 3066 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3067 for isr_content in isr_contents:
3068 if not isinstance(isr_content, dict):
3069 continue
69184e41 3070
3071 known_renderers = {
3072 'playlistVideoListRenderer': self._playlist_entries,
3073 'gridRenderer': self._grid_entries,
3074 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3075 'backstagePostThreadRenderer': self._post_thread_entries,
3076 'videoRenderer': lambda x: [self._video_entry(x)],
3077 }
3078 for key, renderer in isr_content.items():
3079 if key not in known_renderers:
3080 continue
3081 for entry in known_renderers[key](renderer):
3082 if entry:
3083 yield entry
3462ffa8 3084 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3085 break
70d5c17b 3086
3462ffa8 3087 if not continuation_list[0]:
3088 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3089
3090 if not continuation_list[0]:
3091 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3092
3093 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3094 tab_content = try_get(tab, lambda x: x['content'], dict)
3095 if not tab_content:
3096 return
3462ffa8 3097 parent_renderer = (
29f7c58a 3098 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3099 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3100 for entry in extract_entries(parent_renderer):
3101 yield entry
3462ffa8 3102 continuation = continuation_list[0]
f4f751af 3103 context = self._extract_context(ytcfg)
3104 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3105
8bdd16b4 3106 for page_num in itertools.count(1):
3107 if not continuation:
3108 break
79360d99 3109 query = {
3110 'continuation': continuation['continuation'],
3111 'clickTracking': {'clickTrackingParams': continuation['itct']}
3112 }
f4f751af 3113 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3114 response = self._extract_response(
3115 item_id='%s page %s' % (item_id, page_num),
3116 query=query, headers=headers, ytcfg=ytcfg,
3117 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3118
3119 if not response:
8bdd16b4 3120 break
f4f751af 3121 visitor_data = try_get(
3122 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3123
69184e41 3124 known_continuation_renderers = {
3125 'playlistVideoListContinuation': self._playlist_entries,
3126 'gridContinuation': self._grid_entries,
3127 'itemSectionContinuation': self._post_thread_continuation_entries,
3128 'sectionListContinuation': extract_entries, # for feeds
3129 }
8bdd16b4 3130 continuation_contents = try_get(
69184e41 3131 response, lambda x: x['continuationContents'], dict) or {}
3132 continuation_renderer = None
3133 for key, value in continuation_contents.items():
3134 if key not in known_continuation_renderers:
3462ffa8 3135 continue
69184e41 3136 continuation_renderer = value
3137 continuation_list = [None]
3138 for entry in known_continuation_renderers[key](continuation_renderer):
3139 yield entry
3140 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3141 break
3142 if continuation_renderer:
3143 continue
c5e8d7af 3144
a1b535bd 3145 known_renderers = {
3146 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3147 'gridVideoRenderer': (self._grid_entries, 'items'),
3148 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3149 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3150 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3151 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3152 }
cce889b9 3153 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3154 continuation_items = try_get(
cce889b9 3155 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3156 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3157 video_items_renderer = None
3158 for key, value in continuation_item.items():
3159 if key not in known_renderers:
8bdd16b4 3160 continue
a1b535bd 3161 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3162 continuation_list = [None]
a1b535bd 3163 for entry in known_renderers[key][0](video_items_renderer):
3164 yield entry
9ba5705a 3165 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3166 break
3167 if video_items_renderer:
3168 continue
8bdd16b4 3169 break
9558dcec 3170
8bdd16b4 3171 @staticmethod
3172 def _extract_selected_tab(tabs):
3173 for tab in tabs:
3174 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3175 return tab['tabRenderer']
2b3c2546 3176 else:
8bdd16b4 3177 raise ExtractorError('Unable to find selected tab')
b82f815f 3178
8bdd16b4 3179 @staticmethod
3180 def _extract_uploader(data):
3181 uploader = {}
3182 sidebar_renderer = try_get(
3183 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3184 if sidebar_renderer:
3185 for item in sidebar_renderer:
3186 if not isinstance(item, dict):
3187 continue
3188 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3189 if not isinstance(renderer, dict):
3190 continue
3191 owner = try_get(
3192 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3193 if owner:
3194 uploader['uploader'] = owner.get('text')
3195 uploader['uploader_id'] = try_get(
3196 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3197 uploader['uploader_url'] = urljoin(
3198 'https://www.youtube.com/',
3199 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3200 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3201
d069eca7 3202 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3203 playlist_id = title = description = channel_url = channel_name = channel_id = None
3204 thumbnails_list = tags = []
3205
8bdd16b4 3206 selected_tab = self._extract_selected_tab(tabs)
3207 renderer = try_get(
3208 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3209 if renderer:
b60419c5 3210 channel_name = renderer.get('title')
3211 channel_url = renderer.get('channelUrl')
3212 channel_id = renderer.get('externalId')
39ed931e 3213 else:
64c0d954 3214 renderer = try_get(
3215 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3216
8bdd16b4 3217 if renderer:
3218 title = renderer.get('title')
ecc97af3 3219 description = renderer.get('description', '')
b60419c5 3220 playlist_id = channel_id
3221 tags = renderer.get('keywords', '').split()
3222 thumbnails_list = (
3223 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3224 or try_get(
3225 data,
3226 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3227 list)
b60419c5 3228 or [])
3229
3230 thumbnails = []
3231 for t in thumbnails_list:
3232 if not isinstance(t, dict):
3233 continue
3234 thumbnail_url = url_or_none(t.get('url'))
3235 if not thumbnail_url:
3236 continue
3237 thumbnails.append({
3238 'url': thumbnail_url,
3239 'width': int_or_none(t.get('width')),
3240 'height': int_or_none(t.get('height')),
3241 })
3462ffa8 3242 if playlist_id is None:
70d5c17b 3243 playlist_id = item_id
3244 if title is None:
39ed931e 3245 title = (
3246 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3247 or playlist_id)
b60419c5 3248 title += format_field(selected_tab, 'title', ' - %s')
3249
3250 metadata = {
3251 'playlist_id': playlist_id,
3252 'playlist_title': title,
3253 'playlist_description': description,
3254 'uploader': channel_name,
3255 'uploader_id': channel_id,
3256 'uploader_url': channel_url,
3257 'thumbnails': thumbnails,
3258 'tags': tags,
3259 }
3260 if not channel_id:
3261 metadata.update(self._extract_uploader(data))
3262 metadata.update({
3263 'channel': metadata['uploader'],
3264 'channel_id': metadata['uploader_id'],
3265 'channel_url': metadata['uploader_url']})
3266 return self.playlist_result(
d069eca7
M
3267 self._entries(
3268 selected_tab, playlist_id,
3269 self._extract_identity_token(webpage, item_id),
f4f751af 3270 self._extract_account_syncid(data),
3271 self._extract_ytcfg(item_id, webpage)),
b60419c5 3272 **metadata)
73c4ac2c 3273
79360d99 3274 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3275 first_id = last_id = None
79360d99 3276 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3277 headers = self._generate_api_headers(
3278 ytcfg, account_syncid=self._extract_account_syncid(data),
3279 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3280 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3281 for page_num in itertools.count(1):
cd7c66cf 3282 videos = list(self._playlist_entries(playlist))
3283 if not videos:
3284 return
2be71994 3285 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3286 if start >= len(videos):
3287 return
3288 for video in videos[start:]:
3289 if video['id'] == first_id:
3290 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3291 return
3292 yield video
3293 first_id = first_id or videos[0]['id']
3294 last_id = videos[-1]['id']
79360d99 3295 watch_endpoint = try_get(
3296 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3297 query = {
3298 'playlistId': playlist_id,
3299 'videoId': watch_endpoint.get('videoId') or last_id,
3300 'index': watch_endpoint.get('index') or len(videos),
3301 'params': watch_endpoint.get('params') or 'OAE%3D'
3302 }
3303 response = self._extract_response(
3304 item_id='%s page %d' % (playlist_id, page_num),
3305 query=query,
3306 ep='next',
3307 headers=headers,
3308 check_get_keys='contents'
3309 )
cd7c66cf 3310 playlist = try_get(
79360d99 3311 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3312
79360d99 3313 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3314 title = playlist.get('title') or try_get(
3315 data, lambda x: x['titleText']['simpleText'], compat_str)
3316 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3317
3318 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3319 playlist_url = urljoin(url, try_get(
3320 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3321 compat_str))
3322 if playlist_url and playlist_url != url:
3323 return self.url_result(
3324 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3325 video_title=title)
cd7c66cf 3326
8bdd16b4 3327 return self.playlist_result(
79360d99 3328 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3329 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3330
f3eaa8dd
M
3331 def _extract_alerts(self, data, expected=False):
3332
3333 def _real_extract_alerts():
3334 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3335 if not isinstance(alert_dict, dict):
02ced43c 3336 continue
f3eaa8dd
M
3337 for alert in alert_dict.values():
3338 alert_type = alert.get('type')
3339 if not alert_type:
3340 continue
3ffc7c89 3341 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
02ced43c 3342 if message:
3343 yield alert_type, message
f3eaa8dd 3344 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3ffc7c89 3345 message += try_get(run, lambda x: x['text'], compat_str)
3346 if message:
3347 yield alert_type, message
f3eaa8dd 3348
3ffc7c89 3349 errors = []
3350 warnings = []
f3eaa8dd
M
3351 for alert_type, alert_message in _real_extract_alerts():
3352 if alert_type.lower() == 'error':
3ffc7c89 3353 errors.append([alert_type, alert_message])
f3eaa8dd 3354 else:
3ffc7c89 3355 warnings.append([alert_type, alert_message])
f3eaa8dd 3356
3ffc7c89 3357 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3358 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3359 if errors:
3360 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3361
358de58c 3362 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3363 """
3364 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3365 """
3366 sidebar_renderer = try_get(
5d342002 3367 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3368 if not sidebar_renderer:
3369 return
3370 browse_id = params = None
358de58c 3371 for item in sidebar_renderer:
3372 if not isinstance(item, dict):
3373 continue
3374 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3375 menu_renderer = try_get(
3376 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3377 for menu_item in menu_renderer:
3378 if not isinstance(menu_item, dict):
3379 continue
3380 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3381 text = try_get(
3382 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3383 if not text or text.lower() != 'show unavailable videos':
3384 continue
3385 browse_endpoint = try_get(
3386 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3387 browse_id = browse_endpoint.get('browseId')
3388 params = browse_endpoint.get('params')
5d342002 3389 break
3390
3391 ytcfg = self._extract_ytcfg(item_id, webpage)
3392 headers = self._generate_api_headers(
3393 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3394 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3395 visitor_data=try_get(
3396 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3397 query = {
3398 'params': params or 'wgYCCAA=',
3399 'browseId': browse_id or 'VL%s' % item_id
3400 }
3401 return self._extract_response(
3402 item_id=item_id, headers=headers, query=query,
3403 check_get_keys='contents', fatal=False,
3404 note='Downloading API JSON with unavailable videos')
358de58c 3405
79360d99 3406 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3407 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3408 response = None
3409 last_error = None
3410 count = -1
3411 retries = self._downloader.params.get('extractor_retries', 3)
3412 if check_get_keys is None:
3413 check_get_keys = []
3414 while count < retries:
3415 count += 1
3416 if last_error:
3417 self.report_warning('%s. Retrying ...' % last_error)
3418 try:
3419 response = self._call_api(
3420 ep=ep, fatal=True, headers=headers,
358de58c 3421 video_id=item_id, query=query,
79360d99 3422 context=self._extract_context(ytcfg),
3423 api_key=self._extract_api_key(ytcfg),
3424 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3425 except ExtractorError as e:
3426 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3427 # Downloading page may result in intermittent 5xx HTTP error
3428 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3429 last_error = 'HTTP Error %s' % e.cause.code
3430 if count < retries:
3431 continue
358de58c 3432 if fatal:
3433 raise
3434 else:
3435 self.report_warning(error_to_compat_str(e))
3436 return
3437
79360d99 3438 else:
3439 # Youtube may send alerts if there was an issue with the continuation page
3440 self._extract_alerts(response, expected=False)
3441 if not check_get_keys or dict_get(response, check_get_keys):
3442 break
3443 # Youtube sometimes sends incomplete data
3444 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3445 last_error = 'Incomplete data received'
3446 if count >= retries:
358de58c 3447 if fatal:
3448 raise ExtractorError(last_error)
3449 else:
3450 self.report_warning(last_error)
3451 return
79360d99 3452 return response
3453
cd7c66cf 3454 def _extract_webpage(self, url, item_id):
62bff2c1 3455 retries = self._downloader.params.get('extractor_retries', 3)
3456 count = -1
c705177d 3457 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3458 while count < retries:
62bff2c1 3459 count += 1
14fdfea9 3460 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3461 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3462 if count:
c705177d 3463 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3464 webpage = self._download_webpage(
3465 url, item_id,
cd7c66cf 3466 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3467 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3468 self._extract_alerts(data, expected=True)
14fdfea9 3469 if data.get('contents') or data.get('currentVideoEndpoint'):
3470 break
c705177d 3471 if count >= retries:
6a39ee13 3472 raise ExtractorError(last_error)
cd7c66cf 3473 return webpage, data
3474
3475 def _real_extract(self, url):
3476 item_id = self._match_id(url)
3477 url = compat_urlparse.urlunparse(
3478 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3479
3480 # This is not matched in a channel page with a tab selected
3481 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3482 mobj = mobj.groupdict() if mobj else {}
3483 if mobj and not mobj.get('not_channel'):
6a39ee13 3484 self.report_warning(
cd7c66cf 3485 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3486 'To download only the videos in the home page, add a "/featured" to the URL')
3487 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3488
3489 # Handle both video/playlist URLs
201c1459 3490 qs = parse_qs(url)
cd7c66cf 3491 video_id = qs.get('v', [None])[0]
3492 playlist_id = qs.get('list', [None])[0]
3493
3494 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3495 if not playlist_id:
3496 # If there is neither video or playlist ids,
3497 # youtube redirects to home page, which is undesirable
3498 raise ExtractorError('Unable to recognize tab page')
6a39ee13 3499 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3500 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3501
3502 if video_id and playlist_id:
3503 if self._downloader.params.get('noplaylist'):
3504 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3505 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3506 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3507
3508 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3509
358de58c 3510 # YouTube sometimes provides a button to reload playlist with unavailable videos.
3511 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
3512
8bdd16b4 3513 tabs = try_get(
3514 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3515 if tabs:
d069eca7 3516 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3517
8bdd16b4 3518 playlist = try_get(
3519 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3520 if playlist:
79360d99 3521 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3522
a0566bbf 3523 video_id = try_get(
3524 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3525 compat_str) or video_id
8bdd16b4 3526 if video_id:
6a39ee13 3527 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3528 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3529
8bdd16b4 3530 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3531
c5e8d7af 3532
8bdd16b4 3533class YoutubePlaylistIE(InfoExtractor):
3534 IE_DESC = 'YouTube.com playlists'
3535 _VALID_URL = r'''(?x)(?:
3536 (?:https?://)?
3537 (?:\w+\.)?
3538 (?:
3539 (?:
3540 youtube(?:kids)?\.com|
29f7c58a 3541 invidio\.us
8bdd16b4 3542 )
3543 /.*?\?.*?\blist=
3544 )?
3545 (?P<id>%(playlist_id)s)
3546 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3547 IE_NAME = 'youtube:playlist'
cdc628a4 3548 _TESTS = [{
8bdd16b4 3549 'note': 'issue #673',
3550 'url': 'PLBB231211A4F62143',
cdc628a4 3551 'info_dict': {
8bdd16b4 3552 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3553 'id': 'PLBB231211A4F62143',
3554 'uploader': 'Wickydoo',
3555 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3556 },
3557 'playlist_mincount': 29,
3558 }, {
3559 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3560 'info_dict': {
3561 'title': 'YDL_safe_search',
3562 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3563 },
3564 'playlist_count': 2,
3565 'skip': 'This playlist is private',
9558dcec 3566 }, {
8bdd16b4 3567 'note': 'embedded',
3568 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3569 'playlist_count': 4,
9558dcec 3570 'info_dict': {
8bdd16b4 3571 'title': 'JODA15',
3572 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3573 'uploader': 'milan',
3574 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3575 }
cdc628a4 3576 }, {
8bdd16b4 3577 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3578 'playlist_mincount': 982,
3579 'info_dict': {
3580 'title': '2018 Chinese New Singles (11/6 updated)',
3581 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3582 'uploader': 'LBK',
3583 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3584 }
daa0df9e 3585 }, {
29f7c58a 3586 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3587 'only_matching': True,
3588 }, {
3589 # music album playlist
3590 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3591 'only_matching': True,
3592 }]
3593
3594 @classmethod
3595 def suitable(cls, url):
201c1459 3596 if YoutubeTabIE.suitable(url):
3597 return False
3598 qs = parse_qs(url)
3599 if qs.get('v', [None])[0]:
3600 return False
3601 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3602
3603 def _real_extract(self, url):
3604 playlist_id = self._match_id(url)
201c1459 3605 qs = parse_qs(url)
29f7c58a 3606 if not qs:
3607 qs = {'list': playlist_id}
3608 return self.url_result(
3609 update_url_query('https://www.youtube.com/playlist', qs),
3610 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3611
3612
3613class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3614 IE_DESC = 'youtu.be'
29f7c58a 3615 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3616 _TESTS = [{
8bdd16b4 3617 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3618 'info_dict': {
3619 'id': 'yeWKywCrFtk',
3620 'ext': 'mp4',
3621 'title': 'Small Scale Baler and Braiding Rugs',
3622 'uploader': 'Backus-Page House Museum',
3623 'uploader_id': 'backuspagemuseum',
3624 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3625 'upload_date': '20161008',
3626 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3627 'categories': ['Nonprofits & Activism'],
3628 'tags': list,
3629 'like_count': int,
3630 'dislike_count': int,
3631 },
3632 'params': {
3633 'noplaylist': True,
3634 'skip_download': True,
3635 },
39e7107d 3636 }, {
8bdd16b4 3637 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3638 'only_matching': True,
cdc628a4
PH
3639 }]
3640
8bdd16b4 3641 def _real_extract(self, url):
29f7c58a 3642 mobj = re.match(self._VALID_URL, url)
3643 video_id = mobj.group('id')
3644 playlist_id = mobj.group('playlist_id')
8bdd16b4 3645 return self.url_result(
29f7c58a 3646 update_url_query('https://www.youtube.com/watch', {
3647 'v': video_id,
3648 'list': playlist_id,
3649 'feature': 'youtu.be',
3650 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3651
3652
3653class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3654 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3655 _VALID_URL = r'ytuser:(?P<id>.+)'
3656 _TESTS = [{
3657 'url': 'ytuser:phihag',
3658 'only_matching': True,
3659 }]
3660
3661 def _real_extract(self, url):
3662 user_id = self._match_id(url)
3663 return self.url_result(
3664 'https://www.youtube.com/user/%s' % user_id,
3665 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3666
b05654f0 3667
3d3dddc9 3668class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3669 IE_NAME = 'youtube:favorites'
3670 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3671 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3672 _LOGIN_REQUIRED = True
3673 _TESTS = [{
3674 'url': ':ytfav',
3675 'only_matching': True,
3676 }, {
3677 'url': ':ytfavorites',
3678 'only_matching': True,
3679 }]
3680
3681 def _real_extract(self, url):
3682 return self.url_result(
3683 'https://www.youtube.com/playlist?list=LL',
3684 ie=YoutubeTabIE.ie_key())
3685
3686
79360d99 3687class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3688 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3689 # there doesn't appear to be a real limit, for example if you search for
3690 # 'python' you get more than 8.000.000 results
3691 _MAX_RESULTS = float('inf')
78caa52a 3692 IE_NAME = 'youtube:search'
b05654f0 3693 _SEARCH_KEY = 'ytsearch'
6c894ea1 3694 _SEARCH_PARAMS = None
9dd8e46a 3695 _TESTS = []
b05654f0 3696
6c894ea1 3697 def _entries(self, query, n):
a5c56234 3698 data = {'query': query}
6c894ea1
U
3699 if self._SEARCH_PARAMS:
3700 data['params'] = self._SEARCH_PARAMS
3701 total = 0
3702 for page_num in itertools.count(1):
79360d99 3703 search = self._extract_response(
3704 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3705 check_get_keys=('contents', 'onResponseReceivedCommands')
3706 )
6c894ea1 3707 if not search:
b4c08069 3708 break
6c894ea1
U
3709 slr_contents = try_get(
3710 search,
3711 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3712 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3713 list)
3714 if not slr_contents:
a22b2fd1 3715 break
0366ae87 3716
0366ae87
M
3717 # Youtube sometimes adds promoted content to searches,
3718 # changing the index location of videos and token.
3719 # So we search through all entries till we find them.
30a074c2 3720 continuation_token = None
3721 for slr_content in slr_contents:
a96c6d15 3722 if continuation_token is None:
3723 continuation_token = try_get(
3724 slr_content,
3725 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3726 compat_str)
3727
30a074c2 3728 isr_contents = try_get(
3729 slr_content,
3730 lambda x: x['itemSectionRenderer']['contents'],
3731 list)
9da76d30 3732 if not isr_contents:
30a074c2 3733 continue
3734 for content in isr_contents:
3735 if not isinstance(content, dict):
3736 continue
3737 video = content.get('videoRenderer')
3738 if not isinstance(video, dict):
3739 continue
3740 video_id = video.get('videoId')
3741 if not video_id:
3742 continue
3743
3744 yield self._extract_video(video)
3745 total += 1
3746 if total == n:
3747 return
0366ae87 3748
0366ae87 3749 if not continuation_token:
6c894ea1 3750 break
0366ae87 3751 data['continuation'] = continuation_token
b05654f0 3752
6c894ea1
U
3753 def _get_n_results(self, query, n):
3754 """Get a specified number of results for a query"""
3755 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3756
c9ae7b95 3757
a3dd9248 3758class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3759 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3760 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3761 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3762 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3763
c9ae7b95 3764
386e1dd9 3765class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3766 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3767 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3768 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3769 # _MAX_RESULTS = 100
3462ffa8 3770 _TESTS = [{
3771 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3772 'playlist_mincount': 5,
3773 'info_dict': {
3774 'title': 'youtube-dl test video',
3775 }
3776 }, {
3777 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3778 'only_matching': True,
3779 }]
3780
386e1dd9 3781 @classmethod
3782 def _make_valid_url(cls):
3783 return cls._VALID_URL
3784
3462ffa8 3785 def _real_extract(self, url):
386e1dd9 3786 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3787 query = (qs.get('search_query') or qs.get('q'))[0]
3788 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3789 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3790
3791
3792class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3793 """
25f14e9f 3794 Base class for feed extractors
3d3dddc9 3795 Subclasses must define the _FEED_NAME property.
d7ae0639 3796 """
b2e8bc1b 3797 _LOGIN_REQUIRED = True
ef2f3c7f 3798 _TESTS = []
d7ae0639
JMF
3799
3800 @property
3801 def IE_NAME(self):
78caa52a 3802 return 'youtube:%s' % self._FEED_NAME
04cc9617 3803
81f0259b 3804 def _real_initialize(self):
b2e8bc1b 3805 self._login()
81f0259b 3806
3853309f 3807 def _real_extract(self, url):
3d3dddc9 3808 return self.url_result(
3809 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3810 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3811
3812
ef2f3c7f 3813class YoutubeWatchLaterIE(InfoExtractor):
3814 IE_NAME = 'youtube:watchlater'
70d5c17b 3815 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3816 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3817 _TESTS = [{
8bdd16b4 3818 'url': ':ytwatchlater',
bc7a9cd8
S
3819 'only_matching': True,
3820 }]
25f14e9f
S
3821
3822 def _real_extract(self, url):
ef2f3c7f 3823 return self.url_result(
3824 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3825
3826
25f14e9f
S
3827class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3828 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3829 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3830 _FEED_NAME = 'recommended'
3d3dddc9 3831 _TESTS = [{
3832 'url': ':ytrec',
3833 'only_matching': True,
3834 }, {
3835 'url': ':ytrecommended',
3836 'only_matching': True,
3837 }, {
3838 'url': 'https://youtube.com',
3839 'only_matching': True,
3840 }]
1ed5b5c9 3841
1ed5b5c9 3842
25f14e9f 3843class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3844 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3845 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3846 _FEED_NAME = 'subscriptions'
3d3dddc9 3847 _TESTS = [{
3848 'url': ':ytsubs',
3849 'only_matching': True,
3850 }, {
3851 'url': ':ytsubscriptions',
3852 'only_matching': True,
3853 }]
1ed5b5c9 3854
1ed5b5c9 3855
25f14e9f 3856class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3857 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3858 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3859 _FEED_NAME = 'history'
3d3dddc9 3860 _TESTS = [{
3861 'url': ':ythistory',
3862 'only_matching': True,
3863 }]
1ed5b5c9
JMF
3864
3865
15870e90
PH
3866class YoutubeTruncatedURLIE(InfoExtractor):
3867 IE_NAME = 'youtube:truncated_url'
3868 IE_DESC = False # Do not list
975d35db 3869 _VALID_URL = r'''(?x)
b95aab84
PH
3870 (?:https?://)?
3871 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3872 (?:watch\?(?:
c4808c60 3873 feature=[a-z_]+|
b95aab84
PH
3874 annotation_id=annotation_[^&]+|
3875 x-yt-cl=[0-9]+|
c1708b89 3876 hl=[^&]*|
287be8c6 3877 t=[0-9]+
b95aab84
PH
3878 )?
3879 |
3880 attribution_link\?a=[^&]+
3881 )
3882 $
975d35db 3883 '''
15870e90 3884
c4808c60 3885 _TESTS = [{
2d3d2997 3886 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3887 'only_matching': True,
dc2fc736 3888 }, {
2d3d2997 3889 'url': 'https://www.youtube.com/watch?',
dc2fc736 3890 'only_matching': True,
b95aab84
PH
3891 }, {
3892 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3893 'only_matching': True,
3894 }, {
3895 'url': 'https://www.youtube.com/watch?feature=foo',
3896 'only_matching': True,
c1708b89
PH
3897 }, {
3898 'url': 'https://www.youtube.com/watch?hl=en-GB',
3899 'only_matching': True,
287be8c6
PH
3900 }, {
3901 'url': 'https://www.youtube.com/watch?t=2372',
3902 'only_matching': True,
c4808c60
PH
3903 }]
3904
15870e90
PH
3905 def _real_extract(self, url):
3906 raise ExtractorError(
78caa52a
PH
3907 'Did you forget to quote the URL? Remember that & is a meta '
3908 'character in most shells, so you want to put the URL in quotes, '
3867038a 3909 'like youtube-dl '
2d3d2997 3910 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3911 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3912 expected=True)
772fd5cc
PH
3913
3914
3915class YoutubeTruncatedIDIE(InfoExtractor):
3916 IE_NAME = 'youtube:truncated_id'
3917 IE_DESC = False # Do not list
b95aab84 3918 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3919
3920 _TESTS = [{
3921 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3922 'only_matching': True,
3923 }]
3924
3925 def _real_extract(self, url):
3926 video_id = self._match_id(url)
3927 raise ExtractorError(
3928 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3929 expected=True)