]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Fix `--check-formats` when there is network error
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
cd7c66cf 70 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
68b91dc9 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
68217024 88 username, password = self._get_login_info()
b2e8bc1b
JMF
89 # No authentication to be performed
90 if username is None:
70d35d16 91 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 93 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
94 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 95 return True
b2e8bc1b 96
7cc3570e
PH
97 login_page = self._download_webpage(
98 self._LOGIN_URL, None,
69ea8ca4
PH
99 note='Downloading login page',
100 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
101 if login_page is False:
102 return
b2e8bc1b 103
1212e997 104 login_form = self._hidden_inputs(login_page)
c5e8d7af 105
e00eb564
S
106 def req(url, f_req, note, errnote):
107 data = login_form.copy()
108 data.update({
109 'pstMsg': 1,
110 'checkConnection': 'youtube',
111 'checkedDomains': 'youtube',
112 'hl': 'en',
113 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 114 'f.req': json.dumps(f_req),
e00eb564
S
115 'flowName': 'GlifWebSignIn',
116 'flowEntry': 'ServiceLogin',
baf67a60
S
117 # TODO: reverse actual botguard identifier generation algo
118 'bgRequest': '["identifier",""]',
041bc3ad 119 })
e00eb564
S
120 return self._download_json(
121 url, None, note=note, errnote=errnote,
122 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
123 fatal=False,
124 data=urlencode_postdata(data), headers={
125 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
126 'Google-Accounts-XSRF': 1,
127 })
128
3995d37d 129 def warn(message):
6a39ee13 130 self.report_warning(message)
3995d37d
S
131
132 lookup_req = [
133 username,
134 None, [], None, 'US', None, None, 2, False, True,
135 [
136 None, None,
137 [2, 1, None, 1,
138 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
139 None, [], 4],
140 1, [None, None, []], None, None, None, True
141 ],
142 username,
143 ]
144
e00eb564 145 lookup_results = req(
3995d37d 146 self._LOOKUP_URL, lookup_req,
e00eb564
S
147 'Looking up account info', 'Unable to look up account info')
148
149 if lookup_results is False:
150 return False
041bc3ad 151
3995d37d
S
152 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
153 if not user_hash:
154 warn('Unable to extract user hash')
155 return False
156
157 challenge_req = [
158 user_hash,
159 None, 1, None, [1, None, None, None, [password, None, True]],
160 [
161 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
162 1, [None, None, []], None, None, None, True
163 ]]
83317f69 164
3995d37d
S
165 challenge_results = req(
166 self._CHALLENGE_URL, challenge_req,
167 'Logging in', 'Unable to log in')
83317f69 168
3995d37d 169 if challenge_results is False:
e00eb564 170 return
83317f69 171
3995d37d
S
172 login_res = try_get(challenge_results, lambda x: x[0][5], list)
173 if login_res:
174 login_msg = try_get(login_res, lambda x: x[5], compat_str)
175 warn(
176 'Unable to login: %s' % 'Invalid password'
177 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
178 return False
179
180 res = try_get(challenge_results, lambda x: x[0][-1], list)
181 if not res:
182 warn('Unable to extract result entry')
183 return False
184
9a6628aa
S
185 login_challenge = try_get(res, lambda x: x[0][0], list)
186 if login_challenge:
187 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
188 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
189 # SEND_SUCCESS - TFA code has been successfully sent to phone
190 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 191 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
192 if status == 'QUOTA_EXCEEDED':
193 warn('Exceeded the limit of TFA codes, try later')
194 return False
195
196 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
197 if not tl:
198 warn('Unable to extract TL')
199 return False
200
201 tfa_code = self._get_tfa_info('2-step verification code')
202
203 if not tfa_code:
204 warn(
205 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
206 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
207 return False
208
209 tfa_code = remove_start(tfa_code, 'G-')
210
211 tfa_req = [
212 user_hash, None, 2, None,
213 [
214 9, None, None, None, None, None, None, None,
215 [None, tfa_code, True, 2]
216 ]]
217
218 tfa_results = req(
219 self._TFA_URL.format(tl), tfa_req,
220 'Submitting TFA code', 'Unable to submit TFA code')
221
222 if tfa_results is False:
223 return False
224
225 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
226 if tfa_res:
227 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
228 warn(
229 'Unable to finish TFA: %s' % 'Invalid TFA code'
230 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
231 return False
232
233 check_cookie_url = try_get(
234 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
235 else:
236 CHALLENGES = {
237 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
238 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
239 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
240 }
241 challenge = CHALLENGES.get(
242 challenge_str,
243 '%s returned error %s.' % (self.IE_NAME, challenge_str))
244 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
245 return False
3995d37d
S
246 else:
247 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
248
249 if not check_cookie_url:
250 warn('Unable to extract CheckCookie URL')
251 return False
e00eb564
S
252
253 check_cookie_results = self._download_webpage(
3995d37d
S
254 check_cookie_url, None, 'Checking cookie', fatal=False)
255
256 if check_cookie_results is False:
257 return False
e00eb564 258
3995d37d
S
259 if 'https://myaccount.google.com/' not in check_cookie_results:
260 warn('Unable to log in')
b2e8bc1b 261 return False
e00eb564 262
b2e8bc1b
JMF
263 return True
264
cce889b9 265 def _initialize_consent(self):
266 cookies = self._get_cookies('https://www.youtube.com/')
267 if cookies.get('__Secure-3PSID'):
268 return
269 consent_id = None
270 consent = cookies.get('CONSENT')
271 if consent:
272 if 'YES' in consent.value:
273 return
274 consent_id = self._search_regex(
275 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
276 if not consent_id:
277 consent_id = random.randint(100, 999)
278 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 279
b2e8bc1b 280 def _real_initialize(self):
cce889b9 281 self._initialize_consent()
b2e8bc1b
JMF
282 if self._downloader is None:
283 return
b2e8bc1b
JMF
284 if not self._login():
285 return
c5e8d7af 286
f4f751af 287 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
288 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 289 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 290 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
291 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 292
a5c56234
M
293 def _generate_sapisidhash_header(self):
294 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
295 if sapisid_cookie is None:
296 return
297 time_now = round(time.time())
298 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
299 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
300
301 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 302 note='Downloading API JSON', errnote='Unable to download API page',
303 context=None, api_key=None):
304
305 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 306 data.update(query)
f4f751af 307 real_headers = self._generate_api_headers()
308 real_headers.update({'content-type': 'application/json'})
309 if headers:
310 real_headers.update(headers)
545cc85d 311 return self._download_json(
a5c56234
M
312 'https://www.youtube.com/youtubei/v1/%s' % ep,
313 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 314 data=json.dumps(data).encode('utf8'), headers=real_headers,
315 query={'key': api_key or self._extract_api_key()})
316
317 def _extract_api_key(self, ytcfg=None):
318 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 319
8bdd16b4 320 def _extract_yt_initial_data(self, video_id, webpage):
321 return self._parse_json(
322 self._search_regex(
29f7c58a 323 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 324 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 325 video_id)
0c148415 326
a1c5d2ca
M
327 def _extract_identity_token(self, webpage, item_id):
328 ytcfg = self._extract_ytcfg(item_id, webpage)
329 if ytcfg:
330 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
331 if token:
332 return token
333 return self._search_regex(
334 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
335 'identity token', default=None)
336
337 @staticmethod
338 def _extract_account_syncid(data):
8ea3f7b9 339 """
340 Extract syncId required to download private playlists of secondary channels
341 @param data Either response or ytcfg
342 """
343 sync_ids = (try_get(
344 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
345 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
346 if len(sync_ids) >= 2 and sync_ids[1]:
347 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
348 # and just "user_syncid||" for primary channel. We only want the channel_syncid
349 return sync_ids[0]
8ea3f7b9 350 # ytcfg includes channel_syncid if on secondary channel
351 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 352
29f7c58a 353 def _extract_ytcfg(self, video_id, webpage):
8c54a305 354 if not webpage:
355 return {}
29f7c58a 356 return self._parse_json(
357 self._search_regex(
358 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 359 default='{}'), video_id, fatal=False) or {}
360
361 def __extract_client_version(self, ytcfg):
362 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
363
364 def _extract_context(self, ytcfg=None):
365 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
366 if context:
367 return context
368
369 # Recreate the client context (required)
370 client_version = self.__extract_client_version(ytcfg)
371 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
372 context = {
373 'client': {
374 'clientName': client_name,
375 'clientVersion': client_version,
376 }
377 }
378 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
379 if visitor_data:
380 context['client']['visitorData'] = visitor_data
381 return context
382
383 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
384 headers = {
385 'X-YouTube-Client-Name': '1',
386 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
387 }
388 if identity_token:
389 headers['x-youtube-identity-token'] = identity_token
390 if account_syncid:
391 headers['X-Goog-PageId'] = account_syncid
392 headers['X-Goog-AuthUser'] = 0
393 if visitor_data:
394 headers['x-goog-visitor-id'] = visitor_data
395 auth = self._generate_sapisidhash_header()
396 if auth is not None:
397 headers['Authorization'] = auth
398 headers['X-Origin'] = 'https://www.youtube.com'
399 return headers
29f7c58a 400
30a074c2 401 def _extract_video(self, renderer):
402 video_id = renderer.get('videoId')
403 title = try_get(
404 renderer,
405 (lambda x: x['title']['runs'][0]['text'],
406 lambda x: x['title']['simpleText']), compat_str)
407 description = try_get(
408 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
409 compat_str)
410 duration = parse_duration(try_get(
411 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
412 view_count_text = try_get(
413 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
414 view_count = str_to_int(self._search_regex(
415 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
416 'view count', default=None))
417 uploader = try_get(
bc2ca1bb 418 renderer,
419 (lambda x: x['ownerText']['runs'][0]['text'],
420 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 421 return {
39ed931e 422 '_type': 'url',
30a074c2 423 'ie_key': YoutubeIE.ie_key(),
424 'id': video_id,
425 'url': video_id,
426 'title': title,
427 'description': description,
428 'duration': duration,
429 'view_count': view_count,
430 'uploader': uploader,
431 }
432
0c148415 433
360e1ca5 434class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 435 IE_DESC = 'YouTube.com'
bc2ca1bb 436 _INVIDIOUS_SITES = (
437 # invidious-redirect websites
438 r'(?:www\.)?redirect\.invidious\.io',
439 r'(?:(?:www|dev)\.)?invidio\.us',
440 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
441 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 442 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 443 r'(?:(?:www|au)\.)?ytprivate\.com',
444 r'(?:www\.)?invidious\.namazso\.eu',
445 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 446 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
447 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
448 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
449 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
450 # youtube-dl invidious instances list
451 r'(?:(?:www|no)\.)?invidiou\.sh',
452 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
453 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 454 r'(?:www\.)?invidious\.mastodon\.host',
455 r'(?:www\.)?invidious\.zapashcanon\.fr',
456 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 457 r'(?:www\.)?invidious\.tinfoil-hat\.net',
458 r'(?:www\.)?invidious\.himiko\.cloud',
459 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 460 r'(?:www\.)?invidious\.tube',
461 r'(?:www\.)?invidiou\.site',
462 r'(?:www\.)?invidious\.site',
463 r'(?:www\.)?invidious\.xyz',
464 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 465 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 466 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 467 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 468 r'(?:www\.)?tube\.poal\.co',
469 r'(?:www\.)?tube\.connect\.cafe',
470 r'(?:www\.)?vid\.wxzm\.sx',
471 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 472 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 473 r'(?:www\.)?yewtu\.be',
474 r'(?:www\.)?yt\.elukerio\.org',
475 r'(?:www\.)?yt\.lelux\.fi',
476 r'(?:www\.)?invidious\.ggc-project\.de',
477 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 478 r'(?:www\.)?ytprivate\.com',
479 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 480 r'(?:www\.)?invidious\.toot\.koeln',
481 r'(?:www\.)?invidious\.fdn\.fr',
482 r'(?:www\.)?watch\.nettohikari\.com',
483 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
484 r'(?:www\.)?qklhadlycap4cnod\.onion',
485 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
486 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
487 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
488 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
489 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
490 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
491 )
cb7dfeea 492 _VALID_URL = r"""(?x)^
c5e8d7af 493 (
edb53e2d 494 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 495 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
496 (?:www\.)?deturl\.com/www\.youtube\.com|
497 (?:www\.)?pwnyoutube\.com|
498 (?:www\.)?hooktube\.com|
499 (?:www\.)?yourepeat\.com|
500 tube\.majestyc\.net|
501 %(invidious)s|
502 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
503 (?:.*?\#/)? # handle anchor (#/) redirect urls
504 (?: # the various things that can precede the ID:
ac7553d0 505 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 506 |(?: # or the v= param in all its forms
f7000f3a 507 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 508 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 509 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
510 v=
511 )
f4b05232 512 ))
cbaed4bb
S
513 |(?:
514 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
515 vid\.plus| # or vid.plus/xxxx
516 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 517 %(invidious)s
cbaed4bb 518 )/
edb53e2d 519 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 520 )
c5e8d7af 521 )? # all until now is optional -> you can pass the naked ID
201c1459 522 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 523 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 524 $""" % {
bc2ca1bb 525 'invidious': '|'.join(_INVIDIOUS_SITES),
526 }
e40c758c 527 _PLAYER_INFO_RE = (
cc2db878 528 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
529 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 530 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 531 )
2c62dc26 532 _formats = {
c2d3cb4c 533 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
534 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
535 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
536 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
537 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
538 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
539 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
540 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 541 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 542 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
543 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
544 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
545 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
546 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
547 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 548 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 549 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
550 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 551
552
553 # 3D videos
c2d3cb4c 554 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
555 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
556 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
557 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 558 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
559 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
560 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 561
96fb5605 562 # Apple HTTP Live Streaming
11f12195 563 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 564 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
565 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
566 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
567 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
568 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 569 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
570 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
571
572 # DASH mp4 video
d23028a8
S
573 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
574 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
575 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
576 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
577 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 578 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
579 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
580 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
581 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
582 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
583 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
584 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 585
f6f1fc92 586 # Dash mp4 audio
d23028a8
S
587 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
588 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
589 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
590 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
591 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
592 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
593 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
594
595 # Dash webm
d23028a8
S
596 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
597 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
598 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
599 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
600 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
601 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
602 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
603 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
604 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
605 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
606 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
607 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
608 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
609 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
610 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 611 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
612 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
613 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
614 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
615 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
616 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
617 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
618
619 # Dash webm audio
d23028a8
S
620 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
621 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 622
0857baad 623 # Dash webm audio with opus inside
d23028a8
S
624 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
625 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
626 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 627
ce6b9a2d
PH
628 # RTMP (unnamed)
629 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
630
631 # av01 video only formats sometimes served with "unknown" codecs
632 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
633 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
634 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
635 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 636 }
29f7c58a 637 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 638
fd5c4aab
S
639 _GEO_BYPASS = False
640
78caa52a 641 IE_NAME = 'youtube'
2eb88d95
PH
642 _TESTS = [
643 {
2d3d2997 644 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
645 'info_dict': {
646 'id': 'BaW_jenozKc',
647 'ext': 'mp4',
3867038a 648 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
649 'uploader': 'Philipp Hagemeister',
650 'uploader_id': 'phihag',
ec85ded8 651 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
652 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
653 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 654 'upload_date': '20121002',
3867038a 655 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 656 'categories': ['Science & Technology'],
3867038a 657 'tags': ['youtube-dl'],
556dbe7f 658 'duration': 10,
dbdaaa23 659 'view_count': int,
3e7c1224
PH
660 'like_count': int,
661 'dislike_count': int,
7c80519c 662 'start_time': 1,
297a564b 663 'end_time': 9,
2eb88d95 664 }
0e853ca4 665 },
fccd3771 666 {
4bc3a23e
PH
667 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
668 'note': 'Embed-only video (#1746)',
669 'info_dict': {
670 'id': 'yZIXLfi8CZQ',
671 'ext': 'mp4',
672 'upload_date': '20120608',
673 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
674 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
675 'uploader': 'SET India',
94bfcd23 676 'uploader_id': 'setindia',
ec85ded8 677 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 678 'age_limit': 18,
545cc85d 679 },
680 'skip': 'Private video',
fccd3771 681 },
11b56058 682 {
8bdd16b4 683 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
684 'note': 'Use the first video ID in the URL',
685 'info_dict': {
686 'id': 'BaW_jenozKc',
687 'ext': 'mp4',
3867038a 688 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
689 'uploader': 'Philipp Hagemeister',
690 'uploader_id': 'phihag',
ec85ded8 691 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 692 'upload_date': '20121002',
3867038a 693 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 694 'categories': ['Science & Technology'],
3867038a 695 'tags': ['youtube-dl'],
556dbe7f 696 'duration': 10,
dbdaaa23 697 'view_count': int,
11b56058
PM
698 'like_count': int,
699 'dislike_count': int,
34a7de29
S
700 },
701 'params': {
702 'skip_download': True,
703 },
11b56058 704 },
dd27fd17 705 {
2d3d2997 706 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
707 'note': '256k DASH audio (format 141) via DASH manifest',
708 'info_dict': {
709 'id': 'a9LDPn-MO4I',
710 'ext': 'm4a',
711 'upload_date': '20121002',
712 'uploader_id': '8KVIDEO',
ec85ded8 713 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
714 'description': '',
715 'uploader': '8KVIDEO',
716 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 717 },
4bc3a23e
PH
718 'params': {
719 'youtube_include_dash_manifest': True,
720 'format': '141',
4919603f 721 },
de3c7fe0 722 'skip': 'format 141 not served anymore',
dd27fd17 723 },
8bdd16b4 724 # DASH manifest with encrypted signature
725 {
726 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
727 'info_dict': {
728 'id': 'IB3lcPjvWLA',
729 'ext': 'm4a',
730 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
731 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
732 'duration': 244,
733 'uploader': 'AfrojackVEVO',
734 'uploader_id': 'AfrojackVEVO',
735 'upload_date': '20131011',
cc2db878 736 'abr': 129.495,
8bdd16b4 737 },
738 'params': {
739 'youtube_include_dash_manifest': True,
740 'format': '141/bestaudio[ext=m4a]',
741 },
742 },
aa79ac0c
PH
743 # Controversy video
744 {
745 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
746 'info_dict': {
747 'id': 'T4XJQO3qol8',
748 'ext': 'mp4',
556dbe7f 749 'duration': 219,
aa79ac0c 750 'upload_date': '20100909',
4fe54c12 751 'uploader': 'Amazing Atheist',
aa79ac0c 752 'uploader_id': 'TheAmazingAtheist',
ec85ded8 753 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 754 'title': 'Burning Everyone\'s Koran',
545cc85d 755 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 756 }
c522adb1 757 },
dd2d55f1 758 # Normal age-gate video (embed allowed)
c522adb1 759 {
2d3d2997 760 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
761 'info_dict': {
762 'id': 'HtVdAasjOgU',
763 'ext': 'mp4',
764 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 765 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 766 'duration': 142,
c522adb1
JMF
767 'uploader': 'The Witcher',
768 'uploader_id': 'WitcherGame',
ec85ded8 769 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 770 'upload_date': '20140605',
34952f09 771 'age_limit': 18,
c522adb1
JMF
772 },
773 },
8bdd16b4 774 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
775 # YouTube Red ad is not captured for creator
776 {
777 'url': '__2ABJjxzNo',
778 'info_dict': {
779 'id': '__2ABJjxzNo',
780 'ext': 'mp4',
781 'duration': 266,
782 'upload_date': '20100430',
783 'uploader_id': 'deadmau5',
784 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 785 'creator': 'deadmau5',
786 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 787 'uploader': 'deadmau5',
788 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 789 'alt_title': 'Some Chords',
8bdd16b4 790 },
791 'expected_warnings': [
792 'DASH manifest missing',
793 ]
794 },
067aa17e 795 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
796 {
797 'url': 'lqQg6PlCWgI',
798 'info_dict': {
799 'id': 'lqQg6PlCWgI',
800 'ext': 'mp4',
556dbe7f 801 'duration': 6085,
90227264 802 'upload_date': '20150827',
cbe2bd91 803 'uploader_id': 'olympic',
ec85ded8 804 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 805 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 806 'uploader': 'Olympic',
cbe2bd91
PH
807 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
808 },
809 'params': {
810 'skip_download': 'requires avconv',
e52a40ab 811 }
cbe2bd91 812 },
6271f1ca
PH
813 # Non-square pixels
814 {
815 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
816 'info_dict': {
817 'id': '_b-2C3KPAM0',
818 'ext': 'mp4',
819 'stretched_ratio': 16 / 9.,
556dbe7f 820 'duration': 85,
6271f1ca
PH
821 'upload_date': '20110310',
822 'uploader_id': 'AllenMeow',
ec85ded8 823 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 824 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 825 'uploader': '孫ᄋᄅ',
6271f1ca
PH
826 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
827 },
06b491eb
S
828 },
829 # url_encoded_fmt_stream_map is empty string
830 {
831 'url': 'qEJwOuvDf7I',
832 'info_dict': {
833 'id': 'qEJwOuvDf7I',
f57b7835 834 'ext': 'webm',
06b491eb
S
835 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
836 'description': '',
837 'upload_date': '20150404',
838 'uploader_id': 'spbelect',
839 'uploader': 'Наблюдатели Петербурга',
840 },
841 'params': {
842 'skip_download': 'requires avconv',
e323cf3f
S
843 },
844 'skip': 'This live event has ended.',
06b491eb 845 },
067aa17e 846 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
847 {
848 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
849 'info_dict': {
850 'id': 'FIl7x6_3R5Y',
eb6793ba 851 'ext': 'webm',
da77d856
S
852 'title': 'md5:7b81415841e02ecd4313668cde88737a',
853 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 854 'duration': 220,
da77d856
S
855 'upload_date': '20150625',
856 'uploader_id': 'dorappi2000',
ec85ded8 857 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 858 'uploader': 'dorappi2000',
eb6793ba 859 'formats': 'mincount:31',
da77d856 860 },
eb6793ba 861 'skip': 'not actual anymore',
2ee8f5d8 862 },
8a1a26ce
YCH
863 # DASH manifest with segment_list
864 {
865 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
866 'md5': '8ce563a1d667b599d21064e982ab9e31',
867 'info_dict': {
868 'id': 'CsmdDsKjzN8',
869 'ext': 'mp4',
17ee98e1 870 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
871 'uploader': 'Airtek',
872 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
873 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
874 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
875 },
876 'params': {
877 'youtube_include_dash_manifest': True,
878 'format': '135', # bestvideo
be49068d
S
879 },
880 'skip': 'This live event has ended.',
2ee8f5d8 881 },
cf7e015f
S
882 {
883 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 884 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 885 'info_dict': {
545cc85d 886 'id': 'jvGDaLqkpTg',
887 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
888 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
889 },
890 'playlist': [{
891 'info_dict': {
545cc85d 892 'id': 'jvGDaLqkpTg',
cf7e015f 893 'ext': 'mp4',
545cc85d 894 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
895 'description': 'md5:e03b909557865076822aa169218d6a5d',
896 'duration': 10643,
897 'upload_date': '20161111',
898 'uploader': 'Team PGP',
899 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
900 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
901 },
902 }, {
903 'info_dict': {
545cc85d 904 'id': '3AKt1R1aDnw',
cf7e015f 905 'ext': 'mp4',
545cc85d 906 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
907 'description': 'md5:e03b909557865076822aa169218d6a5d',
908 'duration': 10991,
909 'upload_date': '20161111',
910 'uploader': 'Team PGP',
911 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
912 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
913 },
914 }, {
915 'info_dict': {
545cc85d 916 'id': 'RtAMM00gpVc',
cf7e015f 917 'ext': 'mp4',
545cc85d 918 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
919 'description': 'md5:e03b909557865076822aa169218d6a5d',
920 'duration': 10995,
921 'upload_date': '20161111',
922 'uploader': 'Team PGP',
923 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
924 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
925 },
926 }, {
927 'info_dict': {
545cc85d 928 'id': '6N2fdlP3C5U',
cf7e015f 929 'ext': 'mp4',
545cc85d 930 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
931 'description': 'md5:e03b909557865076822aa169218d6a5d',
932 'duration': 10990,
933 'upload_date': '20161111',
934 'uploader': 'Team PGP',
935 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
936 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
937 },
938 }],
939 'params': {
940 'skip_download': True,
941 },
cbaed4bb 942 },
f9f49d87 943 {
067aa17e 944 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
945 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
946 'info_dict': {
947 'id': 'gVfLd0zydlo',
948 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
949 },
950 'playlist_count': 2,
be49068d 951 'skip': 'Not multifeed anymore',
f9f49d87 952 },
cbaed4bb 953 {
2d3d2997 954 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 955 'only_matching': True,
0e49d9a6 956 },
6d4fc66b 957 {
2d3d2997 958 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
959 'only_matching': True,
960 },
0e49d9a6 961 {
067aa17e 962 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 963 # Also tests cut-off URL expansion in video description (see
067aa17e
S
964 # https://github.com/ytdl-org/youtube-dl/issues/1892,
965 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
966 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
967 'info_dict': {
968 'id': 'lsguqyKfVQg',
969 'ext': 'mp4',
970 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 971 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 972 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 973 'duration': 133,
0e49d9a6
LL
974 'upload_date': '20151119',
975 'uploader_id': 'IronSoulElf',
ec85ded8 976 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 977 'uploader': 'IronSoulElf',
eb6793ba
S
978 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
979 'track': 'Dark Walk - Position Music',
980 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 981 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
982 },
983 'params': {
984 'skip_download': True,
985 },
986 },
61f92af1 987 {
067aa17e 988 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
989 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
990 'only_matching': True,
991 },
313dfc45
LL
992 {
993 # Video with yt:stretch=17:0
994 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
995 'info_dict': {
996 'id': 'Q39EVAstoRM',
997 'ext': 'mp4',
998 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
999 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1000 'upload_date': '20151107',
1001 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1002 'uploader': 'CH GAMER DROID',
1003 },
1004 'params': {
1005 'skip_download': True,
1006 },
be49068d 1007 'skip': 'This video does not exist.',
313dfc45 1008 },
201c1459 1009 {
1010 # Video with incomplete 'yt:stretch=16:'
1011 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1012 'only_matching': True,
1013 },
7caf9830
S
1014 {
1015 # Video licensed under Creative Commons
1016 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1017 'info_dict': {
1018 'id': 'M4gD1WSo5mA',
1019 'ext': 'mp4',
1020 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1021 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1022 'duration': 721,
7caf9830
S
1023 'upload_date': '20150127',
1024 'uploader_id': 'BerkmanCenter',
ec85ded8 1025 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1026 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1027 'license': 'Creative Commons Attribution license (reuse allowed)',
1028 },
1029 'params': {
1030 'skip_download': True,
1031 },
1032 },
fd050249
S
1033 {
1034 # Channel-like uploader_url
1035 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1036 'info_dict': {
1037 'id': 'eQcmzGIKrzg',
1038 'ext': 'mp4',
1039 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1040 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1041 'duration': 4060,
fd050249 1042 'upload_date': '20151119',
eb6793ba 1043 'uploader': 'Bernie Sanders',
fd050249 1044 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1046 'license': 'Creative Commons Attribution license (reuse allowed)',
1047 },
1048 'params': {
1049 'skip_download': True,
1050 },
1051 },
040ac686
S
1052 {
1053 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1054 'only_matching': True,
7f29cf54
S
1055 },
1056 {
067aa17e 1057 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1058 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1059 'only_matching': True,
6496ccb4
S
1060 },
1061 {
1062 # Rental video preview
1063 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1064 'info_dict': {
1065 'id': 'uGpuVWrhIzE',
1066 'ext': 'mp4',
1067 'title': 'Piku - Trailer',
1068 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1069 'upload_date': '20150811',
1070 'uploader': 'FlixMatrix',
1071 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1072 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1073 'license': 'Standard YouTube License',
1074 },
1075 'params': {
1076 'skip_download': True,
1077 },
eb6793ba 1078 'skip': 'This video is not available.',
022a5d66 1079 },
12afdc2a
S
1080 {
1081 # YouTube Red video with episode data
1082 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1083 'info_dict': {
1084 'id': 'iqKdEhx-dD4',
1085 'ext': 'mp4',
1086 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1087 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1088 'duration': 2085,
12afdc2a
S
1089 'upload_date': '20170118',
1090 'uploader': 'Vsauce',
1091 'uploader_id': 'Vsauce',
1092 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1093 'series': 'Mind Field',
1094 'season_number': 1,
1095 'episode_number': 1,
1096 },
1097 'params': {
1098 'skip_download': True,
1099 },
1100 'expected_warnings': [
1101 'Skipping DASH manifest',
1102 ],
1103 },
c7121fa7
S
1104 {
1105 # The following content has been identified by the YouTube community
1106 # as inappropriate or offensive to some audiences.
1107 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1108 'info_dict': {
1109 'id': '6SJNVb0GnPI',
1110 'ext': 'mp4',
1111 'title': 'Race Differences in Intelligence',
1112 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1113 'duration': 965,
1114 'upload_date': '20140124',
1115 'uploader': 'New Century Foundation',
1116 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1117 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1118 },
1119 'params': {
1120 'skip_download': True,
1121 },
545cc85d 1122 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1123 },
022a5d66
S
1124 {
1125 # itag 212
1126 'url': '1t24XAntNCY',
1127 'only_matching': True,
fd5c4aab
S
1128 },
1129 {
1130 # geo restricted to JP
1131 'url': 'sJL6WA-aGkQ',
1132 'only_matching': True,
1133 },
cd5a74a2
S
1134 {
1135 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1136 'only_matching': True,
1137 },
bc2ca1bb 1138 {
1139 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1140 'only_matching': True,
1141 },
1142 {
1143 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1144 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1145 'only_matching': True,
1146 },
825cd268
RA
1147 {
1148 # DRM protected
1149 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1150 'only_matching': True,
4fe54c12
S
1151 },
1152 {
1153 # Video with unsupported adaptive stream type formats
1154 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1155 'info_dict': {
1156 'id': 'Z4Vy8R84T1U',
1157 'ext': 'mp4',
1158 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1159 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1160 'duration': 433,
1161 'upload_date': '20130923',
1162 'uploader': 'Amelia Putri Harwita',
1163 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1164 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1165 'formats': 'maxcount:10',
1166 },
1167 'params': {
1168 'skip_download': True,
1169 'youtube_include_dash_manifest': False,
1170 },
5429d6a9 1171 'skip': 'not actual anymore',
5caabd3c 1172 },
1173 {
822b9d9c 1174 # Youtube Music Auto-generated description
5caabd3c 1175 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1176 'info_dict': {
1177 'id': 'MgNrAu2pzNs',
1178 'ext': 'mp4',
1179 'title': 'Voyeur Girl',
1180 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1181 'upload_date': '20190312',
5429d6a9
S
1182 'uploader': 'Stephen - Topic',
1183 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1184 'artist': 'Stephen',
1185 'track': 'Voyeur Girl',
1186 'album': 'it\'s too much love to know my dear',
1187 'release_date': '20190313',
1188 'release_year': 2019,
1189 },
1190 'params': {
1191 'skip_download': True,
1192 },
1193 },
66b48727
RA
1194 {
1195 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1196 'only_matching': True,
1197 },
011e75e6
S
1198 {
1199 # invalid -> valid video id redirection
1200 'url': 'DJztXj2GPfl',
1201 'info_dict': {
1202 'id': 'DJztXj2GPfk',
1203 'ext': 'mp4',
1204 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1205 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1206 'upload_date': '20090125',
1207 'uploader': 'Prochorowka',
1208 'uploader_id': 'Prochorowka',
1209 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1210 'artist': 'Panjabi MC',
1211 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1212 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1213 },
1214 'params': {
1215 'skip_download': True,
1216 },
545cc85d 1217 'skip': 'Video unavailable',
ea74e00b
DP
1218 },
1219 {
1220 # empty description results in an empty string
1221 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1222 'info_dict': {
1223 'id': 'x41yOUIvK2k',
1224 'ext': 'mp4',
1225 'title': 'IMG 3456',
1226 'description': '',
1227 'upload_date': '20170613',
1228 'uploader_id': 'ElevageOrVert',
1229 'uploader': 'ElevageOrVert',
1230 },
1231 'params': {
1232 'skip_download': True,
1233 },
1234 },
a0566bbf 1235 {
29f7c58a 1236 # with '};' inside yt initial data (see [1])
1237 # see [2] for an example with '};' inside ytInitialPlayerResponse
1238 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1239 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1240 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1241 'info_dict': {
1242 'id': 'CHqg6qOn4no',
1243 'ext': 'mp4',
1244 'title': 'Part 77 Sort a list of simple types in c#',
1245 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1246 'upload_date': '20130831',
1247 'uploader_id': 'kudvenkat',
1248 'uploader': 'kudvenkat',
1249 },
1250 'params': {
1251 'skip_download': True,
1252 },
1253 },
29f7c58a 1254 {
1255 # another example of '};' in ytInitialData
1256 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1257 'only_matching': True,
1258 },
1259 {
1260 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1261 'only_matching': True,
1262 },
545cc85d 1263 {
cc2db878 1264 # https://github.com/ytdl-org/youtube-dl/pull/28094
1265 'url': 'OtqTfy26tG0',
1266 'info_dict': {
1267 'id': 'OtqTfy26tG0',
1268 'ext': 'mp4',
1269 'title': 'Burn Out',
1270 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1271 'upload_date': '20141120',
1272 'uploader': 'The Cinematic Orchestra - Topic',
1273 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1274 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1275 'artist': 'The Cinematic Orchestra',
1276 'track': 'Burn Out',
1277 'album': 'Every Day',
1278 'release_data': None,
1279 'release_year': None,
1280 },
1281 'params': {
1282 'skip_download': True,
1283 },
545cc85d 1284 },
bc2ca1bb 1285 {
1286 # controversial video, only works with bpctr when authenticated with cookies
1287 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1288 'only_matching': True,
1289 },
f7ad7160 1290 {
1291 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1292 'url': 'cBvYw8_A0vQ',
1293 'info_dict': {
1294 'id': 'cBvYw8_A0vQ',
1295 'ext': 'mp4',
1296 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1297 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1298 'upload_date': '20201120',
1299 'uploader': 'Walk around Japan',
1300 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1301 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1302 },
1303 'params': {
1304 'skip_download': True,
1305 },
0fb983f6 1306 }, {
1307 # Has multiple audio streams
1308 'url': 'WaOKSUlf4TM',
1309 'only_matching': True
f7ad7160 1310 },
2eb88d95
PH
1311 ]
1312
201c1459 1313 @classmethod
1314 def suitable(cls, url):
1bdae7d3 1315 # Hack for lazy extractors until more generic solution is implemented
1316 # (see #28780)
1317 from .youtube import parse_qs
201c1459 1318 qs = parse_qs(url)
1319 if qs.get('list', [None])[0]:
1320 return False
1321 return super(YoutubeIE, cls).suitable(url)
1322
e0df6211
PH
1323 def __init__(self, *args, **kwargs):
1324 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1325 self._code_cache = {}
83799698 1326 self._player_cache = {}
e0df6211 1327
60064c53
PH
1328 def _signature_cache_id(self, example_sig):
1329 """ Return a string representation of a signature """
78caa52a 1330 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1331
e40c758c
S
1332 @classmethod
1333 def _extract_player_info(cls, player_url):
1334 for player_re in cls._PLAYER_INFO_RE:
1335 id_m = re.search(player_re, player_url)
1336 if id_m:
1337 break
1338 else:
c081b35c 1339 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1340 return id_m.group('id')
e40c758c
S
1341
1342 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1343 player_id = self._extract_player_info(player_url)
e0df6211 1344
c4417ddb 1345 # Read from filesystem cache
545cc85d 1346 func_id = 'js_%s_%s' % (
1347 player_id, self._signature_cache_id(example_sig))
c4417ddb 1348 assert os.path.basename(func_id) == func_id
a0e07d31 1349
69ea8ca4 1350 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1351 if cache_spec is not None:
78caa52a 1352 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1353
545cc85d 1354 if player_id not in self._code_cache:
1355 self._code_cache[player_id] = self._download_webpage(
e0df6211 1356 player_url, video_id,
545cc85d 1357 note='Downloading player ' + player_id,
69ea8ca4 1358 errnote='Download of %s failed' % player_url)
545cc85d 1359 code = self._code_cache[player_id]
1360 res = self._parse_sig_js(code)
e0df6211 1361
785521bf
PH
1362 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1363 cache_res = res(test_string)
1364 cache_spec = [ord(c) for c in cache_res]
83799698 1365
69ea8ca4 1366 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1367 return res
1368
60064c53 1369 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1370 def gen_sig_code(idxs):
1371 def _genslice(start, end, step):
78caa52a 1372 starts = '' if start == 0 else str(start)
8bcc8756 1373 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1374 steps = '' if step == 1 else (':%d' % step)
78caa52a 1375 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1376
1377 step = None
7af808a5
PH
1378 # Quelch pyflakes warnings - start will be set when step is set
1379 start = '(Never used)'
edf3e38e
PH
1380 for i, prev in zip(idxs[1:], idxs[:-1]):
1381 if step is not None:
1382 if i - prev == step:
1383 continue
1384 yield _genslice(start, prev, step)
1385 step = None
1386 continue
1387 if i - prev in [-1, 1]:
1388 step = i - prev
1389 start = prev
1390 continue
1391 else:
78caa52a 1392 yield 's[%d]' % prev
edf3e38e 1393 if step is None:
78caa52a 1394 yield 's[%d]' % i
edf3e38e
PH
1395 else:
1396 yield _genslice(start, i, step)
1397
78caa52a 1398 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1399 cache_res = func(test_string)
edf3e38e 1400 cache_spec = [ord(c) for c in cache_res]
78caa52a 1401 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1402 signature_id_tuple = '(%s)' % (
1403 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1404 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1405 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1406 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1407
e0df6211
PH
1408 def _parse_sig_js(self, jscode):
1409 funcname = self._search_regex(
abefc03f
S
1410 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1411 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1412 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1413 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1414 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1415 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1416 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1417 # Obsolete patterns
1418 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1419 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1420 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1421 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1422 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1423 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1424 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1425 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1426 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1427
1428 jsi = JSInterpreter(jscode)
1429 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1430 return lambda s: initial_function([s])
1431
545cc85d 1432 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1433 """Turn the encrypted s field into a working signature"""
6b37f0be 1434
c8bf86d5 1435 if player_url is None:
69ea8ca4 1436 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1437
69ea8ca4 1438 if player_url.startswith('//'):
78caa52a 1439 player_url = 'https:' + player_url
3c90cc8b
S
1440 elif not re.match(r'https?://', player_url):
1441 player_url = compat_urlparse.urljoin(
1442 'https://www.youtube.com', player_url)
c8bf86d5 1443 try:
62af3a0e 1444 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1445 if player_id not in self._player_cache:
1446 func = self._extract_signature_function(
60064c53 1447 video_id, player_url, s
c8bf86d5
PH
1448 )
1449 self._player_cache[player_id] = func
1450 func = self._player_cache[player_id]
1451 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1452 self._print_sig_code(func, s)
c8bf86d5
PH
1453 return func(s)
1454 except Exception as e:
1455 tb = traceback.format_exc()
1456 raise ExtractorError(
78caa52a 1457 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1458
545cc85d 1459 def _mark_watched(self, video_id, player_response):
21c340b8
S
1460 playback_url = url_or_none(try_get(
1461 player_response,
545cc85d 1462 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1463 if not playback_url:
1464 return
1465 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1466 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1467
1468 # cpn generation algorithm is reverse engineered from base.js.
1469 # In fact it works even with dummy cpn.
1470 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1471 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1472
1473 qs.update({
1474 'ver': ['2'],
1475 'cpn': [cpn],
1476 })
1477 playback_url = compat_urlparse.urlunparse(
15707c7e 1478 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1479
1480 self._download_webpage(
1481 playback_url, video_id, 'Marking watched',
1482 'Unable to mark watched', fatal=False)
1483
66c9fa36
S
1484 @staticmethod
1485 def _extract_urls(webpage):
1486 # Embedded YouTube player
1487 entries = [
1488 unescapeHTML(mobj.group('url'))
1489 for mobj in re.finditer(r'''(?x)
1490 (?:
1491 <iframe[^>]+?src=|
1492 data-video-url=|
1493 <embed[^>]+?src=|
1494 embedSWF\(?:\s*|
1495 <object[^>]+data=|
1496 new\s+SWFObject\(
1497 )
1498 (["\'])
1499 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1500 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1501 \1''', webpage)]
1502
1503 # lazyYT YouTube embed
1504 entries.extend(list(map(
1505 unescapeHTML,
1506 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1507
1508 # Wordpress "YouTube Video Importer" plugin
1509 matches = re.findall(r'''(?x)<div[^>]+
1510 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1511 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1512 entries.extend(m[-1] for m in matches)
1513
1514 return entries
1515
1516 @staticmethod
1517 def _extract_url(webpage):
1518 urls = YoutubeIE._extract_urls(webpage)
1519 return urls[0] if urls else None
1520
97665381
PH
1521 @classmethod
1522 def extract_id(cls, url):
1523 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1524 if mobj is None:
69ea8ca4 1525 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1526 video_id = mobj.group(2)
1527 return video_id
1528
545cc85d 1529 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1530 chapters_list = try_get(
8bdd16b4 1531 data,
84213ea8
S
1532 lambda x: x['playerOverlays']
1533 ['playerOverlayRenderer']
1534 ['decoratedPlayerBarRenderer']
1535 ['decoratedPlayerBarRenderer']
1536 ['playerBar']
1537 ['chapteredPlayerBarRenderer']
1538 ['chapters'],
1539 list)
1540 if not chapters_list:
1541 return
1542
1543 def chapter_time(chapter):
1544 return float_or_none(
1545 try_get(
1546 chapter,
1547 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1548 int),
1549 scale=1000)
1550 chapters = []
1551 for next_num, chapter in enumerate(chapters_list, start=1):
1552 start_time = chapter_time(chapter)
1553 if start_time is None:
1554 continue
1555 end_time = (chapter_time(chapters_list[next_num])
1556 if next_num < len(chapters_list) else duration)
1557 if end_time is None:
1558 continue
1559 title = try_get(
1560 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1561 compat_str)
1562 chapters.append({
1563 'start_time': start_time,
1564 'end_time': end_time,
1565 'title': title,
1566 })
1567 return chapters
1568
545cc85d 1569 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1570 return self._parse_json(self._search_regex(
1571 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1572 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1573
d92f5d5a 1574 @staticmethod
1575 def parse_time_text(time_text):
1576 """
1577 Parse the comment time text
1578 time_text is in the format 'X units ago (edited)'
1579 """
1580 time_text_split = time_text.split(' ')
1581 if len(time_text_split) >= 3:
1582 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1583
a1c5d2ca
M
1584 @staticmethod
1585 def _join_text_entries(runs):
1586 text = None
1587 for run in runs:
1588 if not isinstance(run, dict):
1589 continue
1590 sub_text = try_get(run, lambda x: x['text'], compat_str)
1591 if sub_text:
1592 if not text:
1593 text = sub_text
1594 continue
1595 text += sub_text
1596 return text
1597
1598 def _extract_comment(self, comment_renderer, parent=None):
1599 comment_id = comment_renderer.get('commentId')
1600 if not comment_id:
1601 return
1602 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1603 text = self._join_text_entries(comment_text_runs) or ''
1604 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1605 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1606 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1607 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1608 author_id = try_get(comment_renderer,
1609 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1610 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1611 lambda x: x['likeCount']), compat_str)) or 0
1612 author_thumbnail = try_get(comment_renderer,
1613 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1614
1615 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1616 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1617 return {
1618 'id': comment_id,
1619 'text': text,
d92f5d5a 1620 'timestamp': timestamp,
a1c5d2ca
M
1621 'time_text': time_text,
1622 'like_count': votes,
1623 'is_favorited': is_liked,
1624 'author': author,
1625 'author_id': author_id,
1626 'author_thumbnail': author_thumbnail,
1627 'author_is_uploader': author_is_uploader,
1628 'parent': parent or 'root'
1629 }
1630
1631 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1632 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1633
1634 def extract_thread(parent_renderer):
1635 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1636 if not parent:
1637 comment_counts[2] = 0
1638 for content in contents:
1639 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1640 comment_renderer = try_get(
1641 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1642 content, (lambda x: x['commentRenderer'], dict))
1643
1644 if not comment_renderer:
1645 continue
1646 comment = self._extract_comment(comment_renderer, parent)
1647 if not comment:
1648 continue
1649 comment_counts[0] += 1
1650 yield comment
1651 # Attempt to get the replies
1652 comment_replies_renderer = try_get(
1653 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1654
1655 if comment_replies_renderer:
1656 comment_counts[2] += 1
1657 comment_entries_iter = self._comment_entries(
f4f751af 1658 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1659 parent=comment.get('id'), session_token_list=session_token_list,
1660 comment_counts=comment_counts)
1661
1662 for reply_comment in comment_entries_iter:
1663 yield reply_comment
1664
1665 if not comment_counts:
1666 # comment so far, est. total comments, current comment thread #
1667 comment_counts = [0, 0, 0]
a1c5d2ca
M
1668
1669 # TODO: Generalize the download code with TabIE
f4f751af 1670 context = self._extract_context(ytcfg)
1671 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1672 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1673 first_continuation = False
1674 if parent is None:
1675 first_continuation = True
1676
1677 for page_num in itertools.count(0):
1678 if not continuation:
1679 break
f4f751af 1680 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a1c5d2ca
M
1681 retries = self._downloader.params.get('extractor_retries', 3)
1682 count = -1
1683 last_error = None
1684
1685 while count < retries:
1686 count += 1
1687 if last_error:
1688 self.report_warning('%s. Retrying ...' % last_error)
1689 try:
1690 query = {
1691 'ctoken': continuation['ctoken'],
1692 'pbj': 1,
1693 'type': 'next',
1694 }
1695 if parent:
1696 query['action_get_comment_replies'] = 1
1697 else:
1698 query['action_get_comments'] = 1
1699
1700 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1701 if page_num == 0:
1702 if first_continuation:
d92f5d5a 1703 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1704 else:
d92f5d5a 1705 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1706 else:
d92f5d5a 1707 note_prefix = '%sDownloading comment%s page %d %s' % (
1708 ' ' if parent else '',
a1c5d2ca
M
1709 ' replies' if parent else '',
1710 page_num,
1711 comment_prog_str)
1712
1713 browse = self._download_json(
1714 'https://www.youtube.com/comment_service_ajax', None,
1715 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1716 headers=headers, query=query,
1717 data=urlencode_postdata({
1718 'session_token': session_token_list[0]
1719 }))
1720 except ExtractorError as e:
1721 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1722 if e.cause.code == 413:
d92f5d5a 1723 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1724 return
1725 # Downloading page may result in intermittent 5xx HTTP error
1726 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1727 last_error = 'HTTP Error %s' % e.cause.code
1728 if e.cause.code == 404:
d92f5d5a 1729 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1730 if count < retries:
1731 continue
1732 raise
1733 else:
1734 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1735 if session_token:
1736 session_token_list[0] = session_token
1737
1738 response = try_get(browse,
1739 (lambda x: x['response'],
1740 lambda x: x[1]['response'])) or {}
1741
1742 if response.get('continuationContents'):
1743 break
1744
1745 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1746 if browse.get('reload'):
d92f5d5a 1747 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1748
1749 # TODO: not tested, merged from old extractor
1750 err_msg = browse.get('externalErrorMessage')
1751 if err_msg:
1752 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1753
1754 # Youtube sometimes sends incomplete data
1755 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1756 last_error = 'Incomplete data received'
1757 if count >= retries:
6a39ee13 1758 raise ExtractorError(last_error)
a1c5d2ca
M
1759
1760 if not response:
1761 break
f4f751af 1762 visitor_data = try_get(
1763 response,
1764 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1765 compat_str) or visitor_data
a1c5d2ca
M
1766
1767 known_continuation_renderers = {
1768 'itemSectionContinuation': extract_thread,
1769 'commentRepliesContinuation': extract_thread
1770 }
1771
1772 # extract next root continuation from the results
1773 continuation_contents = try_get(
1774 response, lambda x: x['continuationContents'], dict) or {}
1775
1776 for key, value in continuation_contents.items():
1777 if key not in known_continuation_renderers:
1778 continue
1779 continuation_renderer = value
1780
1781 if first_continuation:
1782 first_continuation = False
1783 expected_comment_count = try_get(
1784 continuation_renderer,
1785 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1786 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1787 compat_str)
1788
1789 if expected_comment_count:
1790 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1791 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1792 yield comment_counts[1]
1793
1794 # TODO: cli arg.
1795 # 1/True for newest, 0/False for popular (default)
1796 comment_sort_index = int(True)
1797 sort_continuation_renderer = try_get(
1798 continuation_renderer,
1799 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1800 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1801 # If this fails, the initial continuation page
1802 # starts off with popular anyways.
1803 if sort_continuation_renderer:
1804 continuation = YoutubeTabIE._build_continuation_query(
1805 continuation=sort_continuation_renderer.get('continuation'),
1806 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1807 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1808 break
1809
1810 for entry in known_continuation_renderers[key](continuation_renderer):
1811 yield entry
1812
1813 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1814 break
1815
1816 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1817 """Entry for comment extraction"""
1818 comments = []
1819 known_entry_comment_renderers = (
1820 'itemSectionRenderer',
1821 )
1822 estimated_total = 0
1823 for entry in contents:
1824 for key, renderer in entry.items():
1825 if key not in known_entry_comment_renderers:
1826 continue
1827
1828 comment_iter = self._comment_entries(
1829 renderer,
1830 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1831 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1832 ytcfg=ytcfg,
a1c5d2ca
M
1833 session_token_list=[xsrf_token])
1834
1835 for comment in comment_iter:
1836 if isinstance(comment, int):
1837 estimated_total = comment
1838 continue
1839 comments.append(comment)
1840 break
d92f5d5a 1841 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1842 return {
1843 'comments': comments,
1844 'comment_count': len(comments),
1845 }
1846
c5e8d7af 1847 def _real_extract(self, url):
cf7e015f 1848 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1849 video_id = self._match_id(url)
1850 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1851 webpage_url = base_url + 'watch?v=' + video_id
1852 webpage = self._download_webpage(
cce889b9 1853 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1854
1855 player_response = None
1856 if webpage:
1857 player_response = self._extract_yt_initial_variable(
1858 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1859 video_id, 'initial player response')
f4f751af 1860
1861 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1862 if not player_response:
1863 player_response = self._call_api(
f4f751af 1864 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1865
1866 playability_status = player_response.get('playabilityStatus') or {}
1867 if playability_status.get('reason') == 'Sign in to confirm your age':
1868 pr = self._parse_json(try_get(compat_parse_qs(
1869 self._download_webpage(
1870 base_url + 'get_video_info', video_id,
1871 'Refetching age-gated info webpage',
1872 'unable to download video info webpage', query={
1873 'video_id': video_id,
7c60c33e 1874 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1875 }, fatal=False)),
1876 lambda x: x['player_response'][0],
1877 compat_str) or '{}', video_id)
1878 if pr:
1879 player_response = pr
1880
1881 trailer_video_id = try_get(
1882 playability_status,
1883 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1884 compat_str)
1885 if trailer_video_id:
1886 return self.url_result(
1887 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1888
545cc85d 1889 def get_text(x):
1890 if not x:
c2d125d9 1891 return
f7ad7160 1892 text = x.get('simpleText')
1893 if text and isinstance(text, compat_str):
1894 return text
1895 runs = x.get('runs')
1896 if not isinstance(runs, list):
1897 return
1898 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
15be3eb5 1899
545cc85d 1900 search_meta = (
1901 lambda x: self._html_search_meta(x, webpage, default=None)) \
1902 if webpage else lambda x: None
dbdaaa23 1903
545cc85d 1904 video_details = player_response.get('videoDetails') or {}
37357d21 1905 microformat = try_get(
545cc85d 1906 player_response,
1907 lambda x: x['microformat']['playerMicroformatRenderer'],
1908 dict) or {}
1909 video_title = video_details.get('title') \
1910 or get_text(microformat.get('title')) \
1911 or search_meta(['og:title', 'twitter:title', 'title'])
1912 video_description = video_details.get('shortDescription')
cf7e015f 1913
8fe10494 1914 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1915 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1916 multifeed_metadata_list = try_get(
1917 player_response,
1918 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1919 compat_str)
8fe10494
S
1920 if multifeed_metadata_list:
1921 entries = []
1922 feed_ids = []
1923 for feed in multifeed_metadata_list.split(','):
1924 # Unquote should take place before split on comma (,) since textual
1925 # fields may contain comma as well (see
067aa17e 1926 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1927 feed_data = compat_parse_qs(
1928 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1929
1930 def feed_entry(name):
545cc85d 1931 return try_get(
1932 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1933
1934 feed_id = feed_entry('id')
1935 if not feed_id:
1936 continue
1937 feed_title = feed_entry('title')
1938 title = video_title
1939 if feed_title:
1940 title += ' (%s)' % feed_title
8fe10494
S
1941 entries.append({
1942 '_type': 'url_transparent',
1943 'ie_key': 'Youtube',
1944 'url': smuggle_url(
545cc85d 1945 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1946 {'force_singlefeed': True}),
6b09401b 1947 'title': title,
8fe10494 1948 })
6b09401b 1949 feed_ids.append(feed_id)
8fe10494
S
1950 self.to_screen(
1951 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1952 % (', '.join(feed_ids), video_id))
545cc85d 1953 return self.playlist_result(
1954 entries, video_id, video_title, video_description)
8fe10494
S
1955 else:
1956 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1957
545cc85d 1958 formats = []
1959 itags = []
cc2db878 1960 itag_qualities = {}
545cc85d 1961 player_url = None
dca3ff4a 1962 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1963 streaming_data = player_response.get('streamingData') or {}
1964 streaming_formats = streaming_data.get('formats') or []
1965 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1966 for fmt in streaming_formats:
1967 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1968 continue
321bf820 1969
cc2db878 1970 itag = str_or_none(fmt.get('itag'))
1971 quality = fmt.get('quality')
1972 if itag and quality:
1973 itag_qualities[itag] = quality
1974 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1975 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1976 # number of fragment that would subsequently requested with (`&sq=N`)
1977 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1978 continue
1979
545cc85d 1980 fmt_url = fmt.get('url')
1981 if not fmt_url:
1982 sc = compat_parse_qs(fmt.get('signatureCipher'))
1983 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1984 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1985 if not (sc and fmt_url and encrypted_sig):
1986 continue
1987 if not player_url:
1988 if not webpage:
1989 continue
1990 player_url = self._search_regex(
1991 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1992 webpage, 'player URL', fatal=False)
1993 if not player_url:
201e9eaa 1994 continue
545cc85d 1995 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1996 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1997 fmt_url += '&' + sp + '=' + signature
1998
545cc85d 1999 if itag:
2000 itags.append(itag)
cc2db878 2001 tbr = float_or_none(
2002 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
0fb983f6 2003 audio_track = fmt.get('audioTrack') or {}
545cc85d 2004 dct = {
2005 'asr': int_or_none(fmt.get('audioSampleRate')),
2006 'filesize': int_or_none(fmt.get('contentLength')),
2007 'format_id': itag,
0fb983f6 2008 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2009 'fps': int_or_none(fmt.get('fps')),
2010 'height': int_or_none(fmt.get('height')),
dca3ff4a 2011 'quality': q(quality),
cc2db878 2012 'tbr': tbr,
545cc85d 2013 'url': fmt_url,
2014 'width': fmt.get('width'),
0fb983f6 2015 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2016 }
2017 mimetype = fmt.get('mimeType')
2018 if mimetype:
2019 mobj = re.match(
2020 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2021 if mobj:
2022 dct['ext'] = mimetype2ext(mobj.group(1))
2023 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2024 no_audio = dct.get('acodec') == 'none'
2025 no_video = dct.get('vcodec') == 'none'
2026 if no_audio:
2027 dct['vbr'] = tbr
2028 if no_video:
2029 dct['abr'] = tbr
2030 if no_audio or no_video:
545cc85d 2031 dct['downloader_options'] = {
2032 # Youtube throttles chunks >~10M
2033 'http_chunk_size': 10485760,
bf1317d2 2034 }
7c60c33e 2035 if dct.get('ext'):
2036 dct['container'] = dct['ext'] + '_dash'
545cc85d 2037 formats.append(dct)
2038
2039 hls_manifest_url = streaming_data.get('hlsManifestUrl')
2040 if hls_manifest_url:
2041 for f in self._extract_m3u8_formats(
2042 hls_manifest_url, video_id, 'mp4', fatal=False):
2043 itag = self._search_regex(
2044 r'/itag/(\d+)', f['url'], 'itag', default=None)
2045 if itag:
2046 f['format_id'] = itag
2047 formats.append(f)
2048
1418a043 2049 if self._downloader.params.get('youtube_include_dash_manifest', True):
545cc85d 2050 dash_manifest_url = streaming_data.get('dashManifestUrl')
2051 if dash_manifest_url:
545cc85d 2052 for f in self._extract_mpd_formats(
2053 dash_manifest_url, video_id, fatal=False):
cc2db878 2054 itag = f['format_id']
2055 if itag in itags:
2056 continue
dca3ff4a 2057 if itag in itag_qualities:
2058 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
2059 # but kept to maintain feature parity (and code similarity) with youtube-dl
2060 # Remove if this causes any issues with sorting in future
2061 f['quality'] = q(itag_qualities[itag])
545cc85d 2062 filesize = int_or_none(self._search_regex(
2063 r'/clen/(\d+)', f.get('fragment_base_url')
2064 or f['url'], 'file size', default=None))
2065 if filesize:
2066 f['filesize'] = filesize
cc2db878 2067 formats.append(f)
bf1317d2 2068
545cc85d 2069 if not formats:
63ad4d43 2070 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2071 self.raise_no_formats(
545cc85d 2072 'This video is DRM protected.', expected=True)
2073 pemr = try_get(
2074 playability_status,
2075 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2076 dict) or {}
2077 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2078 subreason = pemr.get('subreason')
2079 if subreason:
2080 subreason = clean_html(get_text(subreason))
2081 if subreason == 'The uploader has not made this video available in your country.':
2082 countries = microformat.get('availableCountries')
2083 if not countries:
2084 regions_allowed = search_meta('regionsAllowed')
2085 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2086 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2087 reason += '\n' + subreason
2088 if reason:
b7da73eb 2089 self.raise_no_formats(reason, expected=True)
bf1317d2 2090
545cc85d 2091 self._sort_formats(formats)
bf1317d2 2092
545cc85d 2093 keywords = video_details.get('keywords') or []
2094 if not keywords and webpage:
2095 keywords = [
2096 unescapeHTML(m.group('content'))
2097 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2098 for keyword in keywords:
2099 if keyword.startswith('yt:stretch='):
201c1459 2100 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2101 if mobj:
2102 # NB: float is intentional for forcing float division
2103 w, h = (float(v) for v in mobj.groups())
2104 if w > 0 and h > 0:
2105 ratio = w / h
2106 for f in formats:
2107 if f.get('vcodec') != 'none':
2108 f['stretched_ratio'] = ratio
2109 break
6449cd80 2110
545cc85d 2111 thumbnails = []
2112 for container in (video_details, microformat):
2113 for thumbnail in (try_get(
2114 container,
2115 lambda x: x['thumbnail']['thumbnails'], list) or []):
2116 thumbnail_url = thumbnail.get('url')
2117 if not thumbnail_url:
bf1317d2 2118 continue
1988fab7 2119 # Sometimes youtube gives a wrong thumbnail URL. See:
2120 # https://github.com/yt-dlp/yt-dlp/issues/233
2121 # https://github.com/ytdl-org/youtube-dl/issues/28023
2122 if 'maxresdefault' in thumbnail_url:
2123 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2124 thumbnails.append({
2125 'height': int_or_none(thumbnail.get('height')),
2126 'url': thumbnail_url,
2127 'width': int_or_none(thumbnail.get('width')),
2128 })
2129 if thumbnails:
2130 break
a6211d23 2131 else:
545cc85d 2132 thumbnail = search_meta(['og:image', 'twitter:image'])
2133 if thumbnail:
2134 thumbnails = [{'url': thumbnail}]
2135
2136 category = microformat.get('category') or search_meta('genre')
2137 channel_id = video_details.get('channelId') \
2138 or microformat.get('externalChannelId') \
2139 or search_meta('channelId')
2140 duration = int_or_none(
2141 video_details.get('lengthSeconds')
2142 or microformat.get('lengthSeconds')) \
2143 or parse_duration(search_meta('duration'))
2144 is_live = video_details.get('isLive')
2145 owner_profile_url = microformat.get('ownerProfileUrl')
2146
2147 info = {
2148 'id': video_id,
2149 'title': self._live_title(video_title) if is_live else video_title,
2150 'formats': formats,
2151 'thumbnails': thumbnails,
2152 'description': video_description,
2153 'upload_date': unified_strdate(
2154 microformat.get('uploadDate')
2155 or search_meta('uploadDate')),
2156 'uploader': video_details['author'],
2157 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2158 'uploader_url': owner_profile_url,
2159 'channel_id': channel_id,
2160 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2161 'duration': duration,
2162 'view_count': int_or_none(
2163 video_details.get('viewCount')
2164 or microformat.get('viewCount')
2165 or search_meta('interactionCount')),
2166 'average_rating': float_or_none(video_details.get('averageRating')),
2167 'age_limit': 18 if (
2168 microformat.get('isFamilySafe') is False
2169 or search_meta('isFamilyFriendly') == 'false'
2170 or search_meta('og:restrictions:age') == '18+') else 0,
2171 'webpage_url': webpage_url,
2172 'categories': [category] if category else None,
2173 'tags': keywords,
2174 'is_live': is_live,
2175 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2176 'was_live': video_details.get('isLiveContent'),
545cc85d 2177 }
b477fc13 2178
545cc85d 2179 pctr = try_get(
2180 player_response,
2181 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2182 subtitles = {}
2183 if pctr:
2184 def process_language(container, base_url, lang_code, query):
2185 lang_subs = []
2186 for fmt in self._SUBTITLE_FORMATS:
2187 query.update({
2188 'fmt': fmt,
2189 })
2190 lang_subs.append({
2191 'ext': fmt,
2192 'url': update_url_query(base_url, query),
2193 })
2194 container[lang_code] = lang_subs
7e72694b 2195
545cc85d 2196 for caption_track in (pctr.get('captionTracks') or []):
2197 base_url = caption_track.get('baseUrl')
2198 if not base_url:
2199 continue
2200 if caption_track.get('kind') != 'asr':
2201 lang_code = caption_track.get('languageCode')
2202 if not lang_code:
2203 continue
2204 process_language(
2205 subtitles, base_url, lang_code, {})
2206 continue
2207 automatic_captions = {}
2208 for translation_language in (pctr.get('translationLanguages') or []):
2209 translation_language_code = translation_language.get('languageCode')
2210 if not translation_language_code:
2211 continue
2212 process_language(
2213 automatic_captions, base_url, translation_language_code,
2214 {'tlang': translation_language_code})
2215 info['automatic_captions'] = automatic_captions
2216 info['subtitles'] = subtitles
7e72694b 2217
545cc85d 2218 parsed_url = compat_urllib_parse_urlparse(url)
2219 for component in [parsed_url.fragment, parsed_url.query]:
2220 query = compat_parse_qs(component)
2221 for k, v in query.items():
2222 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2223 d_k += '_time'
2224 if d_k not in info and k in s_ks:
2225 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2226
2227 # Youtube Music Auto-generated description
822b9d9c 2228 if video_description:
38d70284 2229 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2230 if mobj:
822b9d9c
RA
2231 release_year = mobj.group('release_year')
2232 release_date = mobj.group('release_date')
2233 if release_date:
2234 release_date = release_date.replace('-', '')
2235 if not release_year:
545cc85d 2236 release_year = release_date[:4]
2237 info.update({
2238 'album': mobj.group('album'.strip()),
2239 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2240 'track': mobj.group('track').strip(),
2241 'release_date': release_date,
cc2db878 2242 'release_year': int_or_none(release_year),
545cc85d 2243 })
7e72694b 2244
545cc85d 2245 initial_data = None
2246 if webpage:
2247 initial_data = self._extract_yt_initial_variable(
2248 webpage, self._YT_INITIAL_DATA_RE, video_id,
2249 'yt initial data')
2250 if not initial_data:
2251 initial_data = self._call_api(
f4f751af 2252 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2253
2254 if not is_live:
2255 try:
2256 # This will error if there is no livechat
2257 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2258 info['subtitles']['live_chat'] = [{
394dcd44 2259 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2260 'video_id': video_id,
2261 'ext': 'json',
2262 'protocol': 'youtube_live_chat_replay',
2263 }]
2264 except (KeyError, IndexError, TypeError):
2265 pass
2266
2267 if initial_data:
2268 chapters = self._extract_chapters_from_json(
2269 initial_data, video_id, duration)
2270 if not chapters:
2271 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2272 contents = try_get(
2273 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2274 list)
2275 if not contents:
2276 continue
2277
2278 def chapter_time(mmlir):
2279 return parse_duration(
2280 get_text(mmlir.get('timeDescription')))
2281
2282 chapters = []
2283 for next_num, content in enumerate(contents, start=1):
2284 mmlir = content.get('macroMarkersListItemRenderer') or {}
2285 start_time = chapter_time(mmlir)
2286 end_time = chapter_time(try_get(
2287 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2288 if next_num < len(contents) else duration
2289 if start_time is None or end_time is None:
2290 continue
2291 chapters.append({
2292 'start_time': start_time,
2293 'end_time': end_time,
2294 'title': get_text(mmlir.get('title')),
2295 })
2296 if chapters:
2297 break
2298 if chapters:
2299 info['chapters'] = chapters
2300
2301 contents = try_get(
2302 initial_data,
2303 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2304 list) or []
2305 for content in contents:
2306 vpir = content.get('videoPrimaryInfoRenderer')
2307 if vpir:
2308 stl = vpir.get('superTitleLink')
2309 if stl:
2310 stl = get_text(stl)
2311 if try_get(
2312 vpir,
2313 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2314 info['location'] = stl
2315 else:
2316 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2317 if mobj:
2318 info.update({
2319 'series': mobj.group(1),
2320 'season_number': int(mobj.group(2)),
2321 'episode_number': int(mobj.group(3)),
2322 })
2323 for tlb in (try_get(
2324 vpir,
2325 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2326 list) or []):
2327 tbr = tlb.get('toggleButtonRenderer') or {}
2328 for getter, regex in [(
2329 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2330 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2331 lambda x: x['accessibility'],
2332 lambda x: x['accessibilityData']['accessibilityData'],
2333 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2334 label = (try_get(tbr, getter, dict) or {}).get('label')
2335 if label:
2336 mobj = re.match(regex, label)
2337 if mobj:
2338 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2339 break
2340 sbr_tooltip = try_get(
2341 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2342 if sbr_tooltip:
2343 like_count, dislike_count = sbr_tooltip.split(' / ')
2344 info.update({
2345 'like_count': str_to_int(like_count),
2346 'dislike_count': str_to_int(dislike_count),
2347 })
2348 vsir = content.get('videoSecondaryInfoRenderer')
2349 if vsir:
2350 info['channel'] = get_text(try_get(
2351 vsir,
2352 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2353 dict))
545cc85d 2354 rows = try_get(
2355 vsir,
2356 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2357 list) or []
2358 multiple_songs = False
2359 for row in rows:
2360 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2361 multiple_songs = True
2362 break
2363 for row in rows:
2364 mrr = row.get('metadataRowRenderer') or {}
2365 mrr_title = mrr.get('title')
2366 if not mrr_title:
2367 continue
2368 mrr_title = get_text(mrr['title'])
2369 mrr_contents_text = get_text(mrr['contents'][0])
2370 if mrr_title == 'License':
2371 info['license'] = mrr_contents_text
2372 elif not multiple_songs:
2373 if mrr_title == 'Album':
2374 info['album'] = mrr_contents_text
2375 elif mrr_title == 'Artist':
2376 info['artist'] = mrr_contents_text
2377 elif mrr_title == 'Song':
2378 info['track'] = mrr_contents_text
2379
2380 fallbacks = {
2381 'channel': 'uploader',
2382 'channel_id': 'uploader_id',
2383 'channel_url': 'uploader_url',
2384 }
2385 for to, frm in fallbacks.items():
2386 if not info.get(to):
2387 info[to] = info.get(frm)
2388
2389 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2390 v = info.get(s_k)
2391 if v:
2392 info[d_k] = v
b84071c0 2393
c224251a
M
2394 is_private = bool_or_none(video_details.get('isPrivate'))
2395 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2396 is_membersonly = None
b28f8d24 2397 is_premium = None
c224251a
M
2398 if initial_data and is_private is not None:
2399 is_membersonly = False
b28f8d24 2400 is_premium = False
c224251a
M
2401 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2402 for content in contents or []:
2403 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2404 for badge in badges or []:
2405 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2406 if label.lower() == 'members only':
2407 is_membersonly = True
2408 break
b28f8d24
M
2409 elif label.lower() == 'premium':
2410 is_premium = True
2411 break
2412 if is_membersonly or is_premium:
c224251a
M
2413 break
2414
2415 # TODO: Add this for playlists
2416 info['availability'] = self._availability(
2417 is_private=is_private,
b28f8d24 2418 needs_premium=is_premium,
c224251a
M
2419 needs_subscription=is_membersonly,
2420 needs_auth=info['age_limit'] >= 18,
2421 is_unlisted=None if is_private is None else is_unlisted)
2422
06167fbb 2423 # get xsrf for annotations or comments
2424 get_annotations = self._downloader.params.get('writeannotations', False)
2425 get_comments = self._downloader.params.get('getcomments', False)
2426 if get_annotations or get_comments:
29f7c58a 2427 xsrf_token = None
545cc85d 2428 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2429 if ytcfg:
2430 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2431 if not xsrf_token:
2432 xsrf_token = self._search_regex(
2433 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2434 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2435
2436 # annotations
06167fbb 2437 if get_annotations:
64b6a4e9
RA
2438 invideo_url = try_get(
2439 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2440 if xsrf_token and invideo_url:
29f7c58a 2441 xsrf_field_name = None
2442 if ytcfg:
2443 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2444 if not xsrf_field_name:
2445 xsrf_field_name = self._search_regex(
2446 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2447 webpage, 'xsrf field name',
29f7c58a 2448 group='xsrf_field_name', default='session_token')
8a784c74 2449 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2450 self._proto_relative_url(invideo_url),
2451 video_id, note='Downloading annotations',
2452 errnote='Unable to download video annotations', fatal=False,
2453 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2454
277d6ff5 2455 if get_comments:
a1c5d2ca 2456 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2457
545cc85d 2458 self.mark_watched(video_id, player_response)
d77ab8e2 2459
545cc85d 2460 return info
c5e8d7af 2461
5f6a1245 2462
8bdd16b4 2463class YoutubeTabIE(YoutubeBaseInfoExtractor):
2464 IE_DESC = 'YouTube.com tab'
70d5c17b 2465 _VALID_URL = r'''(?x)
2466 https?://
2467 (?:\w+\.)?
2468 (?:
2469 youtube(?:kids)?\.com|
2470 invidio\.us
2471 )/
2472 (?:
2473 (?:channel|c|user)/|
2474 (?P<not_channel>
9ba5705a 2475 feed/|hashtag/|
70d5c17b 2476 (?:playlist|watch)\?.*?\blist=
2477 )|
29f7c58a 2478 (?!(?:%s)\b) # Direct URLs
70d5c17b 2479 )
2480 (?P<id>[^/?\#&]+)
2481 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2482 IE_NAME = 'youtube:tab'
2483
81127aa5 2484 _TESTS = [{
8bdd16b4 2485 # playlists, multipage
2486 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2487 'playlist_mincount': 94,
2488 'info_dict': {
2489 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2490 'title': 'Игорь Клейнер - Playlists',
2491 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2492 'uploader': 'Игорь Клейнер',
2493 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2494 },
2495 }, {
2496 # playlists, multipage, different order
2497 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2498 'playlist_mincount': 94,
2499 'info_dict': {
2500 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2501 'title': 'Игорь Клейнер - Playlists',
2502 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2503 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2504 'uploader': 'Игорь Клейнер',
8bdd16b4 2505 },
201c1459 2506 }, {
2507 # playlists, series
2508 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2509 'playlist_mincount': 5,
2510 'info_dict': {
2511 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2512 'title': '3Blue1Brown - Playlists',
2513 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2514 },
8bdd16b4 2515 }, {
2516 # playlists, singlepage
2517 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2518 'playlist_mincount': 4,
2519 'info_dict': {
2520 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2521 'title': 'ThirstForScience - Playlists',
2522 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2523 'uploader': 'ThirstForScience',
2524 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2525 }
2526 }, {
2527 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2528 'only_matching': True,
2529 }, {
2530 # basic, single video playlist
0e30a7b9 2531 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2532 'info_dict': {
0e30a7b9 2533 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2534 'uploader': 'Sergey M.',
2535 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2536 'title': 'youtube-dl public playlist',
81127aa5 2537 },
0e30a7b9 2538 'playlist_count': 1,
9291475f 2539 }, {
8bdd16b4 2540 # empty playlist
0e30a7b9 2541 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2542 'info_dict': {
0e30a7b9 2543 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2544 'uploader': 'Sergey M.',
2545 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2546 'title': 'youtube-dl empty playlist',
9291475f
PH
2547 },
2548 'playlist_count': 0,
2549 }, {
8bdd16b4 2550 # Home tab
2551 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2552 'info_dict': {
8bdd16b4 2553 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2554 'title': 'lex will - Home',
2555 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2556 'uploader': 'lex will',
2557 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2558 },
8bdd16b4 2559 'playlist_mincount': 2,
9291475f 2560 }, {
8bdd16b4 2561 # Videos tab
2562 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2563 'info_dict': {
8bdd16b4 2564 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2565 'title': 'lex will - Videos',
2566 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2567 'uploader': 'lex will',
2568 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2569 },
8bdd16b4 2570 'playlist_mincount': 975,
9291475f 2571 }, {
8bdd16b4 2572 # Videos tab, sorted by popular
2573 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2574 'info_dict': {
8bdd16b4 2575 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2576 'title': 'lex will - Videos',
2577 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2578 'uploader': 'lex will',
2579 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2580 },
8bdd16b4 2581 'playlist_mincount': 199,
9291475f 2582 }, {
8bdd16b4 2583 # Playlists tab
2584 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2585 'info_dict': {
8bdd16b4 2586 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2587 'title': 'lex will - Playlists',
2588 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2589 'uploader': 'lex will',
2590 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2591 },
8bdd16b4 2592 'playlist_mincount': 17,
ac7553d0 2593 }, {
8bdd16b4 2594 # Community tab
2595 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2596 'info_dict': {
8bdd16b4 2597 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2598 'title': 'lex will - Community',
2599 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2600 'uploader': 'lex will',
2601 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2602 },
2603 'playlist_mincount': 18,
87dadd45 2604 }, {
8bdd16b4 2605 # Channels tab
2606 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2607 'info_dict': {
8bdd16b4 2608 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2609 'title': 'lex will - Channels',
2610 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2611 'uploader': 'lex will',
2612 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2613 },
deaec5af 2614 'playlist_mincount': 12,
6b08cdf6 2615 }, {
a0566bbf 2616 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2617 'only_matching': True,
2618 }, {
a0566bbf 2619 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2620 'only_matching': True,
2621 }, {
a0566bbf 2622 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2623 'only_matching': True,
2624 }, {
2625 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2626 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2627 'info_dict': {
2628 'title': '29C3: Not my department',
2629 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2630 'uploader': 'Christiaan008',
2631 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2632 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2633 },
2634 'playlist_count': 96,
2635 }, {
2636 'note': 'Large playlist',
2637 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2638 'info_dict': {
8bdd16b4 2639 'title': 'Uploads from Cauchemar',
2640 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2641 'uploader': 'Cauchemar',
2642 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2643 },
8bdd16b4 2644 'playlist_mincount': 1123,
2645 }, {
2646 # even larger playlist, 8832 videos
2647 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2648 'only_matching': True,
4b7df0d3
JMF
2649 }, {
2650 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2651 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2652 'info_dict': {
acf757f4
PH
2653 'title': 'Uploads from Interstellar Movie',
2654 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2655 'uploader': 'Interstellar Movie',
8bdd16b4 2656 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2657 },
481cc733 2658 'playlist_mincount': 21,
358de58c 2659 }, {
2660 'note': 'Playlist with "show unavailable videos" button',
2661 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2662 'info_dict': {
2663 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2664 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2665 'uploader': 'Phim Siêu Nhân Nhật Bản',
2666 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2667 },
2668 'playlist_mincount': 1400,
2669 'expected_warnings': [
2670 'YouTube said: INFO - Unavailable videos are hidden',
2671 ]
5d342002 2672 }, {
2673 'note': 'Playlist with unavailable videos in a later page',
2674 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2675 'info_dict': {
2676 'title': 'Uploads from BlankTV',
2677 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2678 'uploader': 'BlankTV',
2679 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2680 },
2681 'playlist_mincount': 20000,
8bdd16b4 2682 }, {
2683 # https://github.com/ytdl-org/youtube-dl/issues/21844
2684 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2685 'info_dict': {
2686 'title': 'Data Analysis with Dr Mike Pound',
2687 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2688 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2689 'uploader': 'Computerphile',
deaec5af 2690 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2691 },
2692 'playlist_mincount': 11,
2693 }, {
a0566bbf 2694 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2695 'only_matching': True,
dacb3a86
S
2696 }, {
2697 # Playlist URL that does not actually serve a playlist
2698 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2699 'info_dict': {
2700 'id': 'FqZTN594JQw',
2701 'ext': 'webm',
2702 'title': "Smiley's People 01 detective, Adventure Series, Action",
2703 'uploader': 'STREEM',
2704 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2705 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2706 'upload_date': '20150526',
2707 'license': 'Standard YouTube License',
2708 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2709 'categories': ['People & Blogs'],
2710 'tags': list,
dbdaaa23 2711 'view_count': int,
dacb3a86
S
2712 'like_count': int,
2713 'dislike_count': int,
2714 },
2715 'params': {
2716 'skip_download': True,
2717 },
13a75688 2718 'skip': 'This video is not available.',
dacb3a86 2719 'add_ie': [YoutubeIE.ie_key()],
481cc733 2720 }, {
8bdd16b4 2721 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2722 'only_matching': True,
66b48727 2723 }, {
8bdd16b4 2724 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2725 'only_matching': True,
a0566bbf 2726 }, {
2727 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2728 'info_dict': {
2729 'id': '9Auq9mYxFEE',
2730 'ext': 'mp4',
deaec5af 2731 'title': compat_str,
a0566bbf 2732 'uploader': 'Sky News',
2733 'uploader_id': 'skynews',
2734 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2735 'upload_date': '20191102',
deaec5af 2736 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2737 'categories': ['News & Politics'],
2738 'tags': list,
2739 'like_count': int,
2740 'dislike_count': int,
2741 },
2742 'params': {
2743 'skip_download': True,
2744 },
2745 }, {
2746 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2747 'info_dict': {
2748 'id': 'a48o2S1cPoo',
2749 'ext': 'mp4',
2750 'title': 'The Young Turks - Live Main Show',
2751 'uploader': 'The Young Turks',
2752 'uploader_id': 'TheYoungTurks',
2753 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2754 'upload_date': '20150715',
2755 'license': 'Standard YouTube License',
2756 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2757 'categories': ['News & Politics'],
2758 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2759 'like_count': int,
2760 'dislike_count': int,
2761 },
2762 'params': {
2763 'skip_download': True,
2764 },
2765 'only_matching': True,
2766 }, {
2767 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2768 'only_matching': True,
2769 }, {
2770 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2771 'only_matching': True,
3d3dddc9 2772 }, {
2773 'url': 'https://www.youtube.com/feed/trending',
2774 'only_matching': True,
2775 }, {
2776 # needs auth
2777 'url': 'https://www.youtube.com/feed/library',
2778 'only_matching': True,
2779 }, {
2780 # needs auth
2781 'url': 'https://www.youtube.com/feed/history',
2782 'only_matching': True,
2783 }, {
2784 # needs auth
2785 'url': 'https://www.youtube.com/feed/subscriptions',
2786 'only_matching': True,
2787 }, {
2788 # needs auth
2789 'url': 'https://www.youtube.com/feed/watch_later',
2790 'only_matching': True,
2791 }, {
2792 # no longer available?
2793 'url': 'https://www.youtube.com/feed/recommended',
2794 'only_matching': True,
29f7c58a 2795 }, {
2796 # inline playlist with not always working continuations
2797 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2798 'only_matching': True,
2799 }, {
2800 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2801 'only_matching': True,
2802 }, {
2803 'url': 'https://www.youtube.com/course',
2804 'only_matching': True,
2805 }, {
2806 'url': 'https://www.youtube.com/zsecurity',
2807 'only_matching': True,
2808 }, {
2809 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2810 'only_matching': True,
2811 }, {
2812 'url': 'https://www.youtube.com/TheYoungTurks/live',
2813 'only_matching': True,
39ed931e 2814 }, {
2815 'url': 'https://www.youtube.com/hashtag/cctv9',
2816 'info_dict': {
2817 'id': 'cctv9',
2818 'title': '#cctv9',
2819 },
2820 'playlist_mincount': 350,
201c1459 2821 }, {
2822 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2823 'only_matching': True,
29f7c58a 2824 }]
2825
2826 @classmethod
2827 def suitable(cls, url):
2828 return False if YoutubeIE.suitable(url) else super(
2829 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2830
2831 def _extract_channel_id(self, webpage):
2832 channel_id = self._html_search_meta(
2833 'channelId', webpage, 'channel id', default=None)
2834 if channel_id:
2835 return channel_id
2836 channel_url = self._html_search_meta(
2837 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2838 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2839 'twitter:app:url:googleplay'), webpage, 'channel url')
2840 return self._search_regex(
2841 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2842 channel_url, 'channel id')
15f6397c 2843
8bdd16b4 2844 @staticmethod
cd7c66cf 2845 def _extract_basic_item_renderer(item):
2846 # Modified from _extract_grid_item_renderer
201c1459 2847 known_basic_renderers = (
2848 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 2849 )
2850 for key, renderer in item.items():
201c1459 2851 if not isinstance(renderer, dict):
cd7c66cf 2852 continue
201c1459 2853 elif key in known_basic_renderers:
2854 return renderer
2855 elif key.startswith('grid') and key.endswith('Renderer'):
2856 return renderer
8bdd16b4 2857
8bdd16b4 2858 def _grid_entries(self, grid_renderer):
2859 for item in grid_renderer['items']:
2860 if not isinstance(item, dict):
39b62db1 2861 continue
cd7c66cf 2862 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2863 if not isinstance(renderer, dict):
2864 continue
2865 title = try_get(
201c1459 2866 renderer, (lambda x: x['title']['runs'][0]['text'],
2867 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 2868 # playlist
2869 playlist_id = renderer.get('playlistId')
2870 if playlist_id:
2871 yield self.url_result(
2872 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2873 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2874 video_title=title)
201c1459 2875 continue
8bdd16b4 2876 # video
2877 video_id = renderer.get('videoId')
2878 if video_id:
2879 yield self._extract_video(renderer)
201c1459 2880 continue
8bdd16b4 2881 # channel
2882 channel_id = renderer.get('channelId')
2883 if channel_id:
2884 title = try_get(
2885 renderer, lambda x: x['title']['simpleText'], compat_str)
2886 yield self.url_result(
2887 'https://www.youtube.com/channel/%s' % channel_id,
2888 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 2889 continue
2890 # generic endpoint URL support
2891 ep_url = urljoin('https://www.youtube.com/', try_get(
2892 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
2893 compat_str))
2894 if ep_url:
2895 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
2896 if ie.suitable(ep_url):
2897 yield self.url_result(
2898 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
2899 break
8bdd16b4 2900
3d3dddc9 2901 def _shelf_entries_from_content(self, shelf_renderer):
2902 content = shelf_renderer.get('content')
2903 if not isinstance(content, dict):
8bdd16b4 2904 return
cd7c66cf 2905 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2906 if renderer:
2907 # TODO: add support for nested playlists so each shelf is processed
2908 # as separate playlist
2909 # TODO: this includes only first N items
2910 for entry in self._grid_entries(renderer):
2911 yield entry
2912 renderer = content.get('horizontalListRenderer')
2913 if renderer:
2914 # TODO
2915 pass
8bdd16b4 2916
29f7c58a 2917 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2918 ep = try_get(
2919 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2920 compat_str)
2921 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2922 if shelf_url:
29f7c58a 2923 # Skipping links to another channels, note that checking for
2924 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2925 # will not work
2926 if skip_channels and '/channels?' in shelf_url:
2927 return
3d3dddc9 2928 title = try_get(
2929 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2930 yield self.url_result(shelf_url, video_title=title)
2931 # Shelf may not contain shelf URL, fallback to extraction from content
2932 for entry in self._shelf_entries_from_content(shelf_renderer):
2933 yield entry
c5e8d7af 2934
8bdd16b4 2935 def _playlist_entries(self, video_list_renderer):
2936 for content in video_list_renderer['contents']:
2937 if not isinstance(content, dict):
2938 continue
2939 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2940 if not isinstance(renderer, dict):
2941 continue
2942 video_id = renderer.get('videoId')
2943 if not video_id:
2944 continue
2945 yield self._extract_video(renderer)
07aeced6 2946
3462ffa8 2947 def _rich_entries(self, rich_grid_renderer):
2948 renderer = try_get(
70d5c17b 2949 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2950 video_id = renderer.get('videoId')
2951 if not video_id:
2952 return
2953 yield self._extract_video(renderer)
2954
8bdd16b4 2955 def _video_entry(self, video_renderer):
2956 video_id = video_renderer.get('videoId')
2957 if video_id:
2958 return self._extract_video(video_renderer)
dacb3a86 2959
8bdd16b4 2960 def _post_thread_entries(self, post_thread_renderer):
2961 post_renderer = try_get(
2962 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2963 if not post_renderer:
2964 return
2965 # video attachment
2966 video_renderer = try_get(
895b0931 2967 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
2968 video_id = video_renderer.get('videoId')
2969 if video_id:
2970 entry = self._extract_video(video_renderer)
8bdd16b4 2971 if entry:
2972 yield entry
895b0931 2973 # playlist attachment
2974 playlist_id = try_get(
2975 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
2976 if playlist_id:
2977 yield self.url_result(
e28f1c0a 2978 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2979 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 2980 # inline video links
2981 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2982 for run in runs:
2983 if not isinstance(run, dict):
2984 continue
2985 ep_url = try_get(
2986 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2987 if not ep_url:
2988 continue
2989 if not YoutubeIE.suitable(ep_url):
2990 continue
2991 ep_video_id = YoutubeIE._match_id(ep_url)
2992 if video_id == ep_video_id:
2993 continue
895b0931 2994 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 2995
8bdd16b4 2996 def _post_thread_continuation_entries(self, post_thread_continuation):
2997 contents = post_thread_continuation.get('contents')
2998 if not isinstance(contents, list):
2999 return
3000 for content in contents:
3001 renderer = content.get('backstagePostThreadRenderer')
3002 if not isinstance(renderer, dict):
3003 continue
3004 for entry in self._post_thread_entries(renderer):
3005 yield entry
07aeced6 3006
39ed931e 3007 r''' # unused
3008 def _rich_grid_entries(self, contents):
3009 for content in contents:
3010 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3011 if video_renderer:
3012 entry = self._video_entry(video_renderer)
3013 if entry:
3014 yield entry
3015 '''
3016
29f7c58a 3017 @staticmethod
3018 def _build_continuation_query(continuation, ctp=None):
3019 query = {
3020 'ctoken': continuation,
3021 'continuation': continuation,
3022 }
3023 if ctp:
3024 query['itct'] = ctp
3025 return query
3026
8bdd16b4 3027 @staticmethod
3028 def _extract_next_continuation_data(renderer):
3029 next_continuation = try_get(
3030 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3031 if not next_continuation:
3032 return
3033 continuation = next_continuation.get('continuation')
3034 if not continuation:
3035 return
3036 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3037 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3038
8bdd16b4 3039 @classmethod
3040 def _extract_continuation(cls, renderer):
3041 next_continuation = cls._extract_next_continuation_data(renderer)
3042 if next_continuation:
3043 return next_continuation
cc2db878 3044 contents = []
3045 for key in ('contents', 'items'):
3046 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3047 for content in contents:
3048 if not isinstance(content, dict):
3049 continue
3050 continuation_ep = try_get(
3051 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3052 dict)
3053 if not continuation_ep:
3054 continue
3055 continuation = try_get(
3056 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3057 if not continuation:
3058 continue
3059 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3060 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3061
f4f751af 3062 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3063
70d5c17b 3064 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3065 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3066 for content in contents:
3067 if not isinstance(content, dict):
8bdd16b4 3068 continue
70d5c17b 3069 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3070 if not is_renderer:
70d5c17b 3071 renderer = content.get('richItemRenderer')
3462ffa8 3072 if renderer:
3073 for entry in self._rich_entries(renderer):
3074 yield entry
3075 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3076 continue
3462ffa8 3077 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3078 for isr_content in isr_contents:
3079 if not isinstance(isr_content, dict):
3080 continue
69184e41 3081
3082 known_renderers = {
3083 'playlistVideoListRenderer': self._playlist_entries,
3084 'gridRenderer': self._grid_entries,
3085 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3086 'backstagePostThreadRenderer': self._post_thread_entries,
3087 'videoRenderer': lambda x: [self._video_entry(x)],
3088 }
3089 for key, renderer in isr_content.items():
3090 if key not in known_renderers:
3091 continue
3092 for entry in known_renderers[key](renderer):
3093 if entry:
3094 yield entry
3462ffa8 3095 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3096 break
70d5c17b 3097
3462ffa8 3098 if not continuation_list[0]:
3099 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3100
3101 if not continuation_list[0]:
3102 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3103
3104 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3105 tab_content = try_get(tab, lambda x: x['content'], dict)
3106 if not tab_content:
3107 return
3462ffa8 3108 parent_renderer = (
29f7c58a 3109 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3110 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3111 for entry in extract_entries(parent_renderer):
3112 yield entry
3462ffa8 3113 continuation = continuation_list[0]
f4f751af 3114 context = self._extract_context(ytcfg)
3115 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3116
8bdd16b4 3117 for page_num in itertools.count(1):
3118 if not continuation:
3119 break
79360d99 3120 query = {
3121 'continuation': continuation['continuation'],
3122 'clickTracking': {'clickTrackingParams': continuation['itct']}
3123 }
f4f751af 3124 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3125 response = self._extract_response(
3126 item_id='%s page %s' % (item_id, page_num),
3127 query=query, headers=headers, ytcfg=ytcfg,
3128 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3129
3130 if not response:
8bdd16b4 3131 break
f4f751af 3132 visitor_data = try_get(
3133 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3134
69184e41 3135 known_continuation_renderers = {
3136 'playlistVideoListContinuation': self._playlist_entries,
3137 'gridContinuation': self._grid_entries,
3138 'itemSectionContinuation': self._post_thread_continuation_entries,
3139 'sectionListContinuation': extract_entries, # for feeds
3140 }
8bdd16b4 3141 continuation_contents = try_get(
69184e41 3142 response, lambda x: x['continuationContents'], dict) or {}
3143 continuation_renderer = None
3144 for key, value in continuation_contents.items():
3145 if key not in known_continuation_renderers:
3462ffa8 3146 continue
69184e41 3147 continuation_renderer = value
3148 continuation_list = [None]
3149 for entry in known_continuation_renderers[key](continuation_renderer):
3150 yield entry
3151 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3152 break
3153 if continuation_renderer:
3154 continue
c5e8d7af 3155
a1b535bd 3156 known_renderers = {
3157 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3158 'gridVideoRenderer': (self._grid_entries, 'items'),
3159 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3160 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3161 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3162 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3163 }
cce889b9 3164 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3165 continuation_items = try_get(
cce889b9 3166 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3167 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3168 video_items_renderer = None
3169 for key, value in continuation_item.items():
3170 if key not in known_renderers:
8bdd16b4 3171 continue
a1b535bd 3172 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3173 continuation_list = [None]
a1b535bd 3174 for entry in known_renderers[key][0](video_items_renderer):
3175 yield entry
9ba5705a 3176 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3177 break
3178 if video_items_renderer:
3179 continue
8bdd16b4 3180 break
9558dcec 3181
8bdd16b4 3182 @staticmethod
3183 def _extract_selected_tab(tabs):
3184 for tab in tabs:
3185 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3186 return tab['tabRenderer']
2b3c2546 3187 else:
8bdd16b4 3188 raise ExtractorError('Unable to find selected tab')
b82f815f 3189
8bdd16b4 3190 @staticmethod
3191 def _extract_uploader(data):
3192 uploader = {}
3193 sidebar_renderer = try_get(
3194 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3195 if sidebar_renderer:
3196 for item in sidebar_renderer:
3197 if not isinstance(item, dict):
3198 continue
3199 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3200 if not isinstance(renderer, dict):
3201 continue
3202 owner = try_get(
3203 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3204 if owner:
3205 uploader['uploader'] = owner.get('text')
3206 uploader['uploader_id'] = try_get(
3207 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3208 uploader['uploader_url'] = urljoin(
3209 'https://www.youtube.com/',
3210 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3211 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3212
d069eca7 3213 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3214 playlist_id = title = description = channel_url = channel_name = channel_id = None
3215 thumbnails_list = tags = []
3216
8bdd16b4 3217 selected_tab = self._extract_selected_tab(tabs)
3218 renderer = try_get(
3219 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3220 if renderer:
b60419c5 3221 channel_name = renderer.get('title')
3222 channel_url = renderer.get('channelUrl')
3223 channel_id = renderer.get('externalId')
39ed931e 3224 else:
64c0d954 3225 renderer = try_get(
3226 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3227
8bdd16b4 3228 if renderer:
3229 title = renderer.get('title')
ecc97af3 3230 description = renderer.get('description', '')
b60419c5 3231 playlist_id = channel_id
3232 tags = renderer.get('keywords', '').split()
3233 thumbnails_list = (
3234 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3235 or try_get(
3236 data,
3237 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3238 list)
b60419c5 3239 or [])
3240
3241 thumbnails = []
3242 for t in thumbnails_list:
3243 if not isinstance(t, dict):
3244 continue
3245 thumbnail_url = url_or_none(t.get('url'))
3246 if not thumbnail_url:
3247 continue
3248 thumbnails.append({
3249 'url': thumbnail_url,
3250 'width': int_or_none(t.get('width')),
3251 'height': int_or_none(t.get('height')),
3252 })
3462ffa8 3253 if playlist_id is None:
70d5c17b 3254 playlist_id = item_id
3255 if title is None:
39ed931e 3256 title = (
3257 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3258 or playlist_id)
b60419c5 3259 title += format_field(selected_tab, 'title', ' - %s')
3260
3261 metadata = {
3262 'playlist_id': playlist_id,
3263 'playlist_title': title,
3264 'playlist_description': description,
3265 'uploader': channel_name,
3266 'uploader_id': channel_id,
3267 'uploader_url': channel_url,
3268 'thumbnails': thumbnails,
3269 'tags': tags,
3270 }
3271 if not channel_id:
3272 metadata.update(self._extract_uploader(data))
3273 metadata.update({
3274 'channel': metadata['uploader'],
3275 'channel_id': metadata['uploader_id'],
3276 'channel_url': metadata['uploader_url']})
3277 return self.playlist_result(
d069eca7
M
3278 self._entries(
3279 selected_tab, playlist_id,
3280 self._extract_identity_token(webpage, item_id),
f4f751af 3281 self._extract_account_syncid(data),
3282 self._extract_ytcfg(item_id, webpage)),
b60419c5 3283 **metadata)
73c4ac2c 3284
79360d99 3285 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3286 first_id = last_id = None
79360d99 3287 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3288 headers = self._generate_api_headers(
3289 ytcfg, account_syncid=self._extract_account_syncid(data),
3290 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3291 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3292 for page_num in itertools.count(1):
cd7c66cf 3293 videos = list(self._playlist_entries(playlist))
3294 if not videos:
3295 return
2be71994 3296 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3297 if start >= len(videos):
3298 return
3299 for video in videos[start:]:
3300 if video['id'] == first_id:
3301 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3302 return
3303 yield video
3304 first_id = first_id or videos[0]['id']
3305 last_id = videos[-1]['id']
79360d99 3306 watch_endpoint = try_get(
3307 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3308 query = {
3309 'playlistId': playlist_id,
3310 'videoId': watch_endpoint.get('videoId') or last_id,
3311 'index': watch_endpoint.get('index') or len(videos),
3312 'params': watch_endpoint.get('params') or 'OAE%3D'
3313 }
3314 response = self._extract_response(
3315 item_id='%s page %d' % (playlist_id, page_num),
3316 query=query,
3317 ep='next',
3318 headers=headers,
3319 check_get_keys='contents'
3320 )
cd7c66cf 3321 playlist = try_get(
79360d99 3322 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3323
79360d99 3324 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3325 title = playlist.get('title') or try_get(
3326 data, lambda x: x['titleText']['simpleText'], compat_str)
3327 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3328
3329 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3330 playlist_url = urljoin(url, try_get(
3331 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3332 compat_str))
3333 if playlist_url and playlist_url != url:
3334 return self.url_result(
3335 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3336 video_title=title)
cd7c66cf 3337
8bdd16b4 3338 return self.playlist_result(
79360d99 3339 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3340 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3341
f3eaa8dd
M
3342 def _extract_alerts(self, data, expected=False):
3343
3344 def _real_extract_alerts():
3345 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3346 if not isinstance(alert_dict, dict):
02ced43c 3347 continue
f3eaa8dd
M
3348 for alert in alert_dict.values():
3349 alert_type = alert.get('type')
3350 if not alert_type:
3351 continue
3ffc7c89 3352 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
02ced43c 3353 if message:
3354 yield alert_type, message
f3eaa8dd 3355 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3ffc7c89 3356 message += try_get(run, lambda x: x['text'], compat_str)
3357 if message:
3358 yield alert_type, message
f3eaa8dd 3359
3ffc7c89 3360 errors = []
3361 warnings = []
f3eaa8dd
M
3362 for alert_type, alert_message in _real_extract_alerts():
3363 if alert_type.lower() == 'error':
3ffc7c89 3364 errors.append([alert_type, alert_message])
f3eaa8dd 3365 else:
3ffc7c89 3366 warnings.append([alert_type, alert_message])
f3eaa8dd 3367
3ffc7c89 3368 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3369 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3370 if errors:
3371 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3372
358de58c 3373 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3374 """
3375 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3376 """
3377 sidebar_renderer = try_get(
5d342002 3378 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3379 if not sidebar_renderer:
3380 return
3381 browse_id = params = None
358de58c 3382 for item in sidebar_renderer:
3383 if not isinstance(item, dict):
3384 continue
3385 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3386 menu_renderer = try_get(
3387 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3388 for menu_item in menu_renderer:
3389 if not isinstance(menu_item, dict):
3390 continue
3391 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3392 text = try_get(
3393 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3394 if not text or text.lower() != 'show unavailable videos':
3395 continue
3396 browse_endpoint = try_get(
3397 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3398 browse_id = browse_endpoint.get('browseId')
3399 params = browse_endpoint.get('params')
5d342002 3400 break
3401
3402 ytcfg = self._extract_ytcfg(item_id, webpage)
3403 headers = self._generate_api_headers(
3404 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3405 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3406 visitor_data=try_get(
3407 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3408 query = {
3409 'params': params or 'wgYCCAA=',
3410 'browseId': browse_id or 'VL%s' % item_id
3411 }
3412 return self._extract_response(
3413 item_id=item_id, headers=headers, query=query,
3414 check_get_keys='contents', fatal=False,
3415 note='Downloading API JSON with unavailable videos')
358de58c 3416
79360d99 3417 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3418 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3419 response = None
3420 last_error = None
3421 count = -1
3422 retries = self._downloader.params.get('extractor_retries', 3)
3423 if check_get_keys is None:
3424 check_get_keys = []
3425 while count < retries:
3426 count += 1
3427 if last_error:
3428 self.report_warning('%s. Retrying ...' % last_error)
3429 try:
3430 response = self._call_api(
3431 ep=ep, fatal=True, headers=headers,
358de58c 3432 video_id=item_id, query=query,
79360d99 3433 context=self._extract_context(ytcfg),
3434 api_key=self._extract_api_key(ytcfg),
3435 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3436 except ExtractorError as e:
3437 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3438 # Downloading page may result in intermittent 5xx HTTP error
3439 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3440 last_error = 'HTTP Error %s' % e.cause.code
3441 if count < retries:
3442 continue
358de58c 3443 if fatal:
3444 raise
3445 else:
3446 self.report_warning(error_to_compat_str(e))
3447 return
3448
79360d99 3449 else:
3450 # Youtube may send alerts if there was an issue with the continuation page
3451 self._extract_alerts(response, expected=False)
3452 if not check_get_keys or dict_get(response, check_get_keys):
3453 break
3454 # Youtube sometimes sends incomplete data
3455 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3456 last_error = 'Incomplete data received'
3457 if count >= retries:
358de58c 3458 if fatal:
3459 raise ExtractorError(last_error)
3460 else:
3461 self.report_warning(last_error)
3462 return
79360d99 3463 return response
3464
cd7c66cf 3465 def _extract_webpage(self, url, item_id):
62bff2c1 3466 retries = self._downloader.params.get('extractor_retries', 3)
3467 count = -1
c705177d 3468 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3469 while count < retries:
62bff2c1 3470 count += 1
14fdfea9 3471 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3472 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3473 if count:
c705177d 3474 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3475 webpage = self._download_webpage(
3476 url, item_id,
cd7c66cf 3477 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3478 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3479 self._extract_alerts(data, expected=True)
14fdfea9 3480 if data.get('contents') or data.get('currentVideoEndpoint'):
3481 break
c705177d 3482 if count >= retries:
6a39ee13 3483 raise ExtractorError(last_error)
cd7c66cf 3484 return webpage, data
3485
3486 def _real_extract(self, url):
3487 item_id = self._match_id(url)
3488 url = compat_urlparse.urlunparse(
3489 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
53ed7066 3490 compat_opts = self._downloader.params.get('compat_opts', [])
cd7c66cf 3491
3492 # This is not matched in a channel page with a tab selected
3493 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3494 mobj = mobj.groupdict() if mobj else {}
53ed7066 3495 if mobj and not mobj.get('not_channel') and 'no-youtube-channel-redirect' not in compat_opts:
6a39ee13 3496 self.report_warning(
cd7c66cf 3497 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3498 'To download only the videos in the home page, add a "/featured" to the URL')
3499 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3500
3501 # Handle both video/playlist URLs
201c1459 3502 qs = parse_qs(url)
cd7c66cf 3503 video_id = qs.get('v', [None])[0]
3504 playlist_id = qs.get('list', [None])[0]
3505
3506 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3507 if not playlist_id:
3508 # If there is neither video or playlist ids,
3509 # youtube redirects to home page, which is undesirable
3510 raise ExtractorError('Unable to recognize tab page')
6a39ee13 3511 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3512 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3513
3514 if video_id and playlist_id:
3515 if self._downloader.params.get('noplaylist'):
3516 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3517 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3518 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3519
3520 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3521
358de58c 3522 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3523 if 'no-youtube-unavailable-videos' not in compat_opts:
3524 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
358de58c 3525
8bdd16b4 3526 tabs = try_get(
3527 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3528 if tabs:
d069eca7 3529 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3530
8bdd16b4 3531 playlist = try_get(
3532 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3533 if playlist:
79360d99 3534 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3535
a0566bbf 3536 video_id = try_get(
3537 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3538 compat_str) or video_id
8bdd16b4 3539 if video_id:
6a39ee13 3540 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3541 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3542
8bdd16b4 3543 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3544
c5e8d7af 3545
8bdd16b4 3546class YoutubePlaylistIE(InfoExtractor):
3547 IE_DESC = 'YouTube.com playlists'
3548 _VALID_URL = r'''(?x)(?:
3549 (?:https?://)?
3550 (?:\w+\.)?
3551 (?:
3552 (?:
3553 youtube(?:kids)?\.com|
29f7c58a 3554 invidio\.us
8bdd16b4 3555 )
3556 /.*?\?.*?\blist=
3557 )?
3558 (?P<id>%(playlist_id)s)
3559 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3560 IE_NAME = 'youtube:playlist'
cdc628a4 3561 _TESTS = [{
8bdd16b4 3562 'note': 'issue #673',
3563 'url': 'PLBB231211A4F62143',
cdc628a4 3564 'info_dict': {
8bdd16b4 3565 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3566 'id': 'PLBB231211A4F62143',
3567 'uploader': 'Wickydoo',
3568 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3569 },
3570 'playlist_mincount': 29,
3571 }, {
3572 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3573 'info_dict': {
3574 'title': 'YDL_safe_search',
3575 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3576 },
3577 'playlist_count': 2,
3578 'skip': 'This playlist is private',
9558dcec 3579 }, {
8bdd16b4 3580 'note': 'embedded',
3581 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3582 'playlist_count': 4,
9558dcec 3583 'info_dict': {
8bdd16b4 3584 'title': 'JODA15',
3585 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3586 'uploader': 'milan',
3587 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3588 }
cdc628a4 3589 }, {
8bdd16b4 3590 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3591 'playlist_mincount': 982,
3592 'info_dict': {
3593 'title': '2018 Chinese New Singles (11/6 updated)',
3594 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3595 'uploader': 'LBK',
3596 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3597 }
daa0df9e 3598 }, {
29f7c58a 3599 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3600 'only_matching': True,
3601 }, {
3602 # music album playlist
3603 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3604 'only_matching': True,
3605 }]
3606
3607 @classmethod
3608 def suitable(cls, url):
201c1459 3609 if YoutubeTabIE.suitable(url):
3610 return False
1bdae7d3 3611 # Hack for lazy extractors until more generic solution is implemented
3612 # (see #28780)
3613 from .youtube import parse_qs
201c1459 3614 qs = parse_qs(url)
3615 if qs.get('v', [None])[0]:
3616 return False
3617 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3618
3619 def _real_extract(self, url):
3620 playlist_id = self._match_id(url)
201c1459 3621 qs = parse_qs(url)
29f7c58a 3622 if not qs:
3623 qs = {'list': playlist_id}
3624 return self.url_result(
3625 update_url_query('https://www.youtube.com/playlist', qs),
3626 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3627
3628
3629class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3630 IE_DESC = 'youtu.be'
29f7c58a 3631 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3632 _TESTS = [{
8bdd16b4 3633 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3634 'info_dict': {
3635 'id': 'yeWKywCrFtk',
3636 'ext': 'mp4',
3637 'title': 'Small Scale Baler and Braiding Rugs',
3638 'uploader': 'Backus-Page House Museum',
3639 'uploader_id': 'backuspagemuseum',
3640 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3641 'upload_date': '20161008',
3642 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3643 'categories': ['Nonprofits & Activism'],
3644 'tags': list,
3645 'like_count': int,
3646 'dislike_count': int,
3647 },
3648 'params': {
3649 'noplaylist': True,
3650 'skip_download': True,
3651 },
39e7107d 3652 }, {
8bdd16b4 3653 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3654 'only_matching': True,
cdc628a4
PH
3655 }]
3656
8bdd16b4 3657 def _real_extract(self, url):
29f7c58a 3658 mobj = re.match(self._VALID_URL, url)
3659 video_id = mobj.group('id')
3660 playlist_id = mobj.group('playlist_id')
8bdd16b4 3661 return self.url_result(
29f7c58a 3662 update_url_query('https://www.youtube.com/watch', {
3663 'v': video_id,
3664 'list': playlist_id,
3665 'feature': 'youtu.be',
3666 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3667
3668
3669class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3670 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3671 _VALID_URL = r'ytuser:(?P<id>.+)'
3672 _TESTS = [{
3673 'url': 'ytuser:phihag',
3674 'only_matching': True,
3675 }]
3676
3677 def _real_extract(self, url):
3678 user_id = self._match_id(url)
3679 return self.url_result(
3680 'https://www.youtube.com/user/%s' % user_id,
3681 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3682
b05654f0 3683
3d3dddc9 3684class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3685 IE_NAME = 'youtube:favorites'
3686 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3687 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3688 _LOGIN_REQUIRED = True
3689 _TESTS = [{
3690 'url': ':ytfav',
3691 'only_matching': True,
3692 }, {
3693 'url': ':ytfavorites',
3694 'only_matching': True,
3695 }]
3696
3697 def _real_extract(self, url):
3698 return self.url_result(
3699 'https://www.youtube.com/playlist?list=LL',
3700 ie=YoutubeTabIE.ie_key())
3701
3702
79360d99 3703class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3704 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3705 # there doesn't appear to be a real limit, for example if you search for
3706 # 'python' you get more than 8.000.000 results
3707 _MAX_RESULTS = float('inf')
78caa52a 3708 IE_NAME = 'youtube:search'
b05654f0 3709 _SEARCH_KEY = 'ytsearch'
6c894ea1 3710 _SEARCH_PARAMS = None
9dd8e46a 3711 _TESTS = []
b05654f0 3712
6c894ea1 3713 def _entries(self, query, n):
a5c56234 3714 data = {'query': query}
6c894ea1
U
3715 if self._SEARCH_PARAMS:
3716 data['params'] = self._SEARCH_PARAMS
3717 total = 0
3718 for page_num in itertools.count(1):
79360d99 3719 search = self._extract_response(
3720 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3721 check_get_keys=('contents', 'onResponseReceivedCommands')
3722 )
6c894ea1 3723 if not search:
b4c08069 3724 break
6c894ea1
U
3725 slr_contents = try_get(
3726 search,
3727 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3728 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3729 list)
3730 if not slr_contents:
a22b2fd1 3731 break
0366ae87 3732
0366ae87
M
3733 # Youtube sometimes adds promoted content to searches,
3734 # changing the index location of videos and token.
3735 # So we search through all entries till we find them.
30a074c2 3736 continuation_token = None
3737 for slr_content in slr_contents:
a96c6d15 3738 if continuation_token is None:
3739 continuation_token = try_get(
3740 slr_content,
3741 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3742 compat_str)
3743
30a074c2 3744 isr_contents = try_get(
3745 slr_content,
3746 lambda x: x['itemSectionRenderer']['contents'],
3747 list)
9da76d30 3748 if not isr_contents:
30a074c2 3749 continue
3750 for content in isr_contents:
3751 if not isinstance(content, dict):
3752 continue
3753 video = content.get('videoRenderer')
3754 if not isinstance(video, dict):
3755 continue
3756 video_id = video.get('videoId')
3757 if not video_id:
3758 continue
3759
3760 yield self._extract_video(video)
3761 total += 1
3762 if total == n:
3763 return
0366ae87 3764
0366ae87 3765 if not continuation_token:
6c894ea1 3766 break
0366ae87 3767 data['continuation'] = continuation_token
b05654f0 3768
6c894ea1
U
3769 def _get_n_results(self, query, n):
3770 """Get a specified number of results for a query"""
3771 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3772
c9ae7b95 3773
a3dd9248 3774class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3775 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3776 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3777 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3778 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3779
c9ae7b95 3780
386e1dd9 3781class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3782 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3783 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3784 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3785 # _MAX_RESULTS = 100
3462ffa8 3786 _TESTS = [{
3787 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3788 'playlist_mincount': 5,
3789 'info_dict': {
3790 'title': 'youtube-dl test video',
3791 }
3792 }, {
3793 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3794 'only_matching': True,
3795 }]
3796
386e1dd9 3797 @classmethod
3798 def _make_valid_url(cls):
3799 return cls._VALID_URL
3800
3462ffa8 3801 def _real_extract(self, url):
386e1dd9 3802 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3803 query = (qs.get('search_query') or qs.get('q'))[0]
3804 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3805 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3806
3807
3808class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3809 """
25f14e9f 3810 Base class for feed extractors
3d3dddc9 3811 Subclasses must define the _FEED_NAME property.
d7ae0639 3812 """
b2e8bc1b 3813 _LOGIN_REQUIRED = True
ef2f3c7f 3814 _TESTS = []
d7ae0639
JMF
3815
3816 @property
3817 def IE_NAME(self):
78caa52a 3818 return 'youtube:%s' % self._FEED_NAME
04cc9617 3819
81f0259b 3820 def _real_initialize(self):
b2e8bc1b 3821 self._login()
81f0259b 3822
3853309f 3823 def _real_extract(self, url):
3d3dddc9 3824 return self.url_result(
3825 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3826 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3827
3828
ef2f3c7f 3829class YoutubeWatchLaterIE(InfoExtractor):
3830 IE_NAME = 'youtube:watchlater'
70d5c17b 3831 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3832 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3833 _TESTS = [{
8bdd16b4 3834 'url': ':ytwatchlater',
bc7a9cd8
S
3835 'only_matching': True,
3836 }]
25f14e9f
S
3837
3838 def _real_extract(self, url):
ef2f3c7f 3839 return self.url_result(
3840 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3841
3842
25f14e9f
S
3843class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3844 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3845 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3846 _FEED_NAME = 'recommended'
3d3dddc9 3847 _TESTS = [{
3848 'url': ':ytrec',
3849 'only_matching': True,
3850 }, {
3851 'url': ':ytrecommended',
3852 'only_matching': True,
3853 }, {
3854 'url': 'https://youtube.com',
3855 'only_matching': True,
3856 }]
1ed5b5c9 3857
1ed5b5c9 3858
25f14e9f 3859class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3860 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3861 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3862 _FEED_NAME = 'subscriptions'
3d3dddc9 3863 _TESTS = [{
3864 'url': ':ytsubs',
3865 'only_matching': True,
3866 }, {
3867 'url': ':ytsubscriptions',
3868 'only_matching': True,
3869 }]
1ed5b5c9 3870
1ed5b5c9 3871
25f14e9f 3872class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3873 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3874 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3875 _FEED_NAME = 'history'
3d3dddc9 3876 _TESTS = [{
3877 'url': ':ythistory',
3878 'only_matching': True,
3879 }]
1ed5b5c9
JMF
3880
3881
15870e90
PH
3882class YoutubeTruncatedURLIE(InfoExtractor):
3883 IE_NAME = 'youtube:truncated_url'
3884 IE_DESC = False # Do not list
975d35db 3885 _VALID_URL = r'''(?x)
b95aab84
PH
3886 (?:https?://)?
3887 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3888 (?:watch\?(?:
c4808c60 3889 feature=[a-z_]+|
b95aab84
PH
3890 annotation_id=annotation_[^&]+|
3891 x-yt-cl=[0-9]+|
c1708b89 3892 hl=[^&]*|
287be8c6 3893 t=[0-9]+
b95aab84
PH
3894 )?
3895 |
3896 attribution_link\?a=[^&]+
3897 )
3898 $
975d35db 3899 '''
15870e90 3900
c4808c60 3901 _TESTS = [{
2d3d2997 3902 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3903 'only_matching': True,
dc2fc736 3904 }, {
2d3d2997 3905 'url': 'https://www.youtube.com/watch?',
dc2fc736 3906 'only_matching': True,
b95aab84
PH
3907 }, {
3908 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3909 'only_matching': True,
3910 }, {
3911 'url': 'https://www.youtube.com/watch?feature=foo',
3912 'only_matching': True,
c1708b89
PH
3913 }, {
3914 'url': 'https://www.youtube.com/watch?hl=en-GB',
3915 'only_matching': True,
287be8c6
PH
3916 }, {
3917 'url': 'https://www.youtube.com/watch?t=2372',
3918 'only_matching': True,
c4808c60
PH
3919 }]
3920
15870e90
PH
3921 def _real_extract(self, url):
3922 raise ExtractorError(
78caa52a
PH
3923 'Did you forget to quote the URL? Remember that & is a meta '
3924 'character in most shells, so you want to put the URL in quotes, '
3867038a 3925 'like youtube-dl '
2d3d2997 3926 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3927 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3928 expected=True)
772fd5cc
PH
3929
3930
3931class YoutubeTruncatedIDIE(InfoExtractor):
3932 IE_NAME = 'youtube:truncated_id'
3933 IE_DESC = False # Do not list
b95aab84 3934 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3935
3936 _TESTS = [{
3937 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3938 'only_matching': True,
3939 }]
3940
3941 def _real_extract(self, url):
3942 video_id = self._match_id(url)
3943 raise ExtractorError(
3944 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3945 expected=True)