]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Deprecate support for python versions < 3.6
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
cd7c66cf 70 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
68b91dc9 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
68217024 88 username, password = self._get_login_info()
b2e8bc1b
JMF
89 # No authentication to be performed
90 if username is None:
70d35d16 91 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 93 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
94 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 95 return True
b2e8bc1b 96
7cc3570e
PH
97 login_page = self._download_webpage(
98 self._LOGIN_URL, None,
69ea8ca4
PH
99 note='Downloading login page',
100 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
101 if login_page is False:
102 return
b2e8bc1b 103
1212e997 104 login_form = self._hidden_inputs(login_page)
c5e8d7af 105
e00eb564
S
106 def req(url, f_req, note, errnote):
107 data = login_form.copy()
108 data.update({
109 'pstMsg': 1,
110 'checkConnection': 'youtube',
111 'checkedDomains': 'youtube',
112 'hl': 'en',
113 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 114 'f.req': json.dumps(f_req),
e00eb564
S
115 'flowName': 'GlifWebSignIn',
116 'flowEntry': 'ServiceLogin',
baf67a60
S
117 # TODO: reverse actual botguard identifier generation algo
118 'bgRequest': '["identifier",""]',
041bc3ad 119 })
e00eb564
S
120 return self._download_json(
121 url, None, note=note, errnote=errnote,
122 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
123 fatal=False,
124 data=urlencode_postdata(data), headers={
125 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
126 'Google-Accounts-XSRF': 1,
127 })
128
3995d37d 129 def warn(message):
6a39ee13 130 self.report_warning(message)
3995d37d
S
131
132 lookup_req = [
133 username,
134 None, [], None, 'US', None, None, 2, False, True,
135 [
136 None, None,
137 [2, 1, None, 1,
138 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
139 None, [], 4],
140 1, [None, None, []], None, None, None, True
141 ],
142 username,
143 ]
144
e00eb564 145 lookup_results = req(
3995d37d 146 self._LOOKUP_URL, lookup_req,
e00eb564
S
147 'Looking up account info', 'Unable to look up account info')
148
149 if lookup_results is False:
150 return False
041bc3ad 151
3995d37d
S
152 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
153 if not user_hash:
154 warn('Unable to extract user hash')
155 return False
156
157 challenge_req = [
158 user_hash,
159 None, 1, None, [1, None, None, None, [password, None, True]],
160 [
161 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
162 1, [None, None, []], None, None, None, True
163 ]]
83317f69 164
3995d37d
S
165 challenge_results = req(
166 self._CHALLENGE_URL, challenge_req,
167 'Logging in', 'Unable to log in')
83317f69 168
3995d37d 169 if challenge_results is False:
e00eb564 170 return
83317f69 171
3995d37d
S
172 login_res = try_get(challenge_results, lambda x: x[0][5], list)
173 if login_res:
174 login_msg = try_get(login_res, lambda x: x[5], compat_str)
175 warn(
176 'Unable to login: %s' % 'Invalid password'
177 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
178 return False
179
180 res = try_get(challenge_results, lambda x: x[0][-1], list)
181 if not res:
182 warn('Unable to extract result entry')
183 return False
184
9a6628aa
S
185 login_challenge = try_get(res, lambda x: x[0][0], list)
186 if login_challenge:
187 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
188 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
189 # SEND_SUCCESS - TFA code has been successfully sent to phone
190 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 191 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
192 if status == 'QUOTA_EXCEEDED':
193 warn('Exceeded the limit of TFA codes, try later')
194 return False
195
196 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
197 if not tl:
198 warn('Unable to extract TL')
199 return False
200
201 tfa_code = self._get_tfa_info('2-step verification code')
202
203 if not tfa_code:
204 warn(
205 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
206 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
207 return False
208
209 tfa_code = remove_start(tfa_code, 'G-')
210
211 tfa_req = [
212 user_hash, None, 2, None,
213 [
214 9, None, None, None, None, None, None, None,
215 [None, tfa_code, True, 2]
216 ]]
217
218 tfa_results = req(
219 self._TFA_URL.format(tl), tfa_req,
220 'Submitting TFA code', 'Unable to submit TFA code')
221
222 if tfa_results is False:
223 return False
224
225 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
226 if tfa_res:
227 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
228 warn(
229 'Unable to finish TFA: %s' % 'Invalid TFA code'
230 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
231 return False
232
233 check_cookie_url = try_get(
234 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
235 else:
236 CHALLENGES = {
237 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
238 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
239 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
240 }
241 challenge = CHALLENGES.get(
242 challenge_str,
243 '%s returned error %s.' % (self.IE_NAME, challenge_str))
244 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
245 return False
3995d37d
S
246 else:
247 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
248
249 if not check_cookie_url:
250 warn('Unable to extract CheckCookie URL')
251 return False
e00eb564
S
252
253 check_cookie_results = self._download_webpage(
3995d37d
S
254 check_cookie_url, None, 'Checking cookie', fatal=False)
255
256 if check_cookie_results is False:
257 return False
e00eb564 258
3995d37d
S
259 if 'https://myaccount.google.com/' not in check_cookie_results:
260 warn('Unable to log in')
b2e8bc1b 261 return False
e00eb564 262
b2e8bc1b
JMF
263 return True
264
cce889b9 265 def _initialize_consent(self):
266 cookies = self._get_cookies('https://www.youtube.com/')
267 if cookies.get('__Secure-3PSID'):
268 return
269 consent_id = None
270 consent = cookies.get('CONSENT')
271 if consent:
272 if 'YES' in consent.value:
273 return
274 consent_id = self._search_regex(
275 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
276 if not consent_id:
277 consent_id = random.randint(100, 999)
278 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 279
b2e8bc1b 280 def _real_initialize(self):
cce889b9 281 self._initialize_consent()
b2e8bc1b
JMF
282 if self._downloader is None:
283 return
b2e8bc1b
JMF
284 if not self._login():
285 return
c5e8d7af 286
f4f751af 287 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
288 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 289 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 290 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
291 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 292
a5c56234
M
293 def _generate_sapisidhash_header(self):
294 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
295 if sapisid_cookie is None:
296 return
297 time_now = round(time.time())
298 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
299 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
300
301 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 302 note='Downloading API JSON', errnote='Unable to download API page',
303 context=None, api_key=None):
304
305 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 306 data.update(query)
f4f751af 307 real_headers = self._generate_api_headers()
308 real_headers.update({'content-type': 'application/json'})
309 if headers:
310 real_headers.update(headers)
545cc85d 311 return self._download_json(
a5c56234
M
312 'https://www.youtube.com/youtubei/v1/%s' % ep,
313 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 314 data=json.dumps(data).encode('utf8'), headers=real_headers,
315 query={'key': api_key or self._extract_api_key()})
316
317 def _extract_api_key(self, ytcfg=None):
318 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 319
8bdd16b4 320 def _extract_yt_initial_data(self, video_id, webpage):
321 return self._parse_json(
322 self._search_regex(
29f7c58a 323 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 324 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 325 video_id)
0c148415 326
a1c5d2ca
M
327 def _extract_identity_token(self, webpage, item_id):
328 ytcfg = self._extract_ytcfg(item_id, webpage)
329 if ytcfg:
330 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
331 if token:
332 return token
333 return self._search_regex(
334 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
335 'identity token', default=None)
336
337 @staticmethod
338 def _extract_account_syncid(data):
8ea3f7b9 339 """
340 Extract syncId required to download private playlists of secondary channels
341 @param data Either response or ytcfg
342 """
343 sync_ids = (try_get(
344 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
345 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
346 if len(sync_ids) >= 2 and sync_ids[1]:
347 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
348 # and just "user_syncid||" for primary channel. We only want the channel_syncid
349 return sync_ids[0]
8ea3f7b9 350 # ytcfg includes channel_syncid if on secondary channel
351 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 352
29f7c58a 353 def _extract_ytcfg(self, video_id, webpage):
8c54a305 354 if not webpage:
355 return {}
29f7c58a 356 return self._parse_json(
357 self._search_regex(
358 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 359 default='{}'), video_id, fatal=False) or {}
360
361 def __extract_client_version(self, ytcfg):
362 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
363
364 def _extract_context(self, ytcfg=None):
365 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
366 if context:
367 return context
368
369 # Recreate the client context (required)
370 client_version = self.__extract_client_version(ytcfg)
371 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
372 context = {
373 'client': {
374 'clientName': client_name,
375 'clientVersion': client_version,
376 }
377 }
378 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
379 if visitor_data:
380 context['client']['visitorData'] = visitor_data
381 return context
382
383 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
384 headers = {
385 'X-YouTube-Client-Name': '1',
386 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
387 }
388 if identity_token:
389 headers['x-youtube-identity-token'] = identity_token
390 if account_syncid:
391 headers['X-Goog-PageId'] = account_syncid
392 headers['X-Goog-AuthUser'] = 0
393 if visitor_data:
394 headers['x-goog-visitor-id'] = visitor_data
395 auth = self._generate_sapisidhash_header()
396 if auth is not None:
397 headers['Authorization'] = auth
398 headers['X-Origin'] = 'https://www.youtube.com'
399 return headers
29f7c58a 400
30a074c2 401 def _extract_video(self, renderer):
402 video_id = renderer.get('videoId')
403 title = try_get(
404 renderer,
405 (lambda x: x['title']['runs'][0]['text'],
406 lambda x: x['title']['simpleText']), compat_str)
407 description = try_get(
408 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
409 compat_str)
410 duration = parse_duration(try_get(
411 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
412 view_count_text = try_get(
413 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
414 view_count = str_to_int(self._search_regex(
415 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
416 'view count', default=None))
417 uploader = try_get(
bc2ca1bb 418 renderer,
419 (lambda x: x['ownerText']['runs'][0]['text'],
420 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 421 return {
39ed931e 422 '_type': 'url',
30a074c2 423 'ie_key': YoutubeIE.ie_key(),
424 'id': video_id,
425 'url': video_id,
426 'title': title,
427 'description': description,
428 'duration': duration,
429 'view_count': view_count,
430 'uploader': uploader,
431 }
432
0c148415 433
360e1ca5 434class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 435 IE_DESC = 'YouTube.com'
bc2ca1bb 436 _INVIDIOUS_SITES = (
437 # invidious-redirect websites
438 r'(?:www\.)?redirect\.invidious\.io',
439 r'(?:(?:www|dev)\.)?invidio\.us',
440 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
441 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 442 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 443 r'(?:(?:www|au)\.)?ytprivate\.com',
444 r'(?:www\.)?invidious\.namazso\.eu',
445 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 446 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
447 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
448 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
449 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
450 # youtube-dl invidious instances list
451 r'(?:(?:www|no)\.)?invidiou\.sh',
452 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
453 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 454 r'(?:www\.)?invidious\.mastodon\.host',
455 r'(?:www\.)?invidious\.zapashcanon\.fr',
456 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 457 r'(?:www\.)?invidious\.tinfoil-hat\.net',
458 r'(?:www\.)?invidious\.himiko\.cloud',
459 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 460 r'(?:www\.)?invidious\.tube',
461 r'(?:www\.)?invidiou\.site',
462 r'(?:www\.)?invidious\.site',
463 r'(?:www\.)?invidious\.xyz',
464 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 465 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 466 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 467 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 468 r'(?:www\.)?tube\.poal\.co',
469 r'(?:www\.)?tube\.connect\.cafe',
470 r'(?:www\.)?vid\.wxzm\.sx',
471 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 472 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 473 r'(?:www\.)?yewtu\.be',
474 r'(?:www\.)?yt\.elukerio\.org',
475 r'(?:www\.)?yt\.lelux\.fi',
476 r'(?:www\.)?invidious\.ggc-project\.de',
477 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 478 r'(?:www\.)?ytprivate\.com',
479 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 480 r'(?:www\.)?invidious\.toot\.koeln',
481 r'(?:www\.)?invidious\.fdn\.fr',
482 r'(?:www\.)?watch\.nettohikari\.com',
483 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
484 r'(?:www\.)?qklhadlycap4cnod\.onion',
485 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
486 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
487 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
488 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
489 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
490 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
491 )
cb7dfeea 492 _VALID_URL = r"""(?x)^
c5e8d7af 493 (
edb53e2d 494 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 495 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
496 (?:www\.)?deturl\.com/www\.youtube\.com|
497 (?:www\.)?pwnyoutube\.com|
498 (?:www\.)?hooktube\.com|
499 (?:www\.)?yourepeat\.com|
500 tube\.majestyc\.net|
501 %(invidious)s|
502 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
503 (?:.*?\#/)? # handle anchor (#/) redirect urls
504 (?: # the various things that can precede the ID:
ac7553d0 505 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 506 |(?: # or the v= param in all its forms
f7000f3a 507 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 508 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 509 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
510 v=
511 )
f4b05232 512 ))
cbaed4bb
S
513 |(?:
514 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
515 vid\.plus| # or vid.plus/xxxx
516 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 517 %(invidious)s
cbaed4bb 518 )/
edb53e2d 519 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 520 )
c5e8d7af 521 )? # all until now is optional -> you can pass the naked ID
201c1459 522 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 523 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 524 $""" % {
bc2ca1bb 525 'invidious': '|'.join(_INVIDIOUS_SITES),
526 }
e40c758c 527 _PLAYER_INFO_RE = (
cc2db878 528 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
529 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 530 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 531 )
2c62dc26 532 _formats = {
c2d3cb4c 533 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
534 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
535 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
536 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
537 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
538 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
539 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
540 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 541 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 542 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
543 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
544 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
545 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
546 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
547 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 548 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 549 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
550 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 551
552
553 # 3D videos
c2d3cb4c 554 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
555 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
556 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
557 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 558 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
559 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
560 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 561
96fb5605 562 # Apple HTTP Live Streaming
11f12195 563 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 564 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
565 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
566 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
567 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
568 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 569 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
570 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
571
572 # DASH mp4 video
d23028a8
S
573 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
574 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
575 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
576 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
577 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 578 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
579 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
580 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
581 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
582 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
583 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
584 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 585
f6f1fc92 586 # Dash mp4 audio
d23028a8
S
587 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
588 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
589 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
590 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
591 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
592 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
593 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
594
595 # Dash webm
d23028a8
S
596 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
597 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
598 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
599 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
600 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
601 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
602 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
603 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
604 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
605 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
606 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
607 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
608 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
609 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
610 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 611 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
612 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
613 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
614 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
615 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
616 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
617 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
618
619 # Dash webm audio
d23028a8
S
620 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
621 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 622
0857baad 623 # Dash webm audio with opus inside
d23028a8
S
624 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
625 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
626 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 627
ce6b9a2d
PH
628 # RTMP (unnamed)
629 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
630
631 # av01 video only formats sometimes served with "unknown" codecs
632 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
633 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
634 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
635 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 636 }
29f7c58a 637 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 638
fd5c4aab
S
639 _GEO_BYPASS = False
640
78caa52a 641 IE_NAME = 'youtube'
2eb88d95
PH
642 _TESTS = [
643 {
2d3d2997 644 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
645 'info_dict': {
646 'id': 'BaW_jenozKc',
647 'ext': 'mp4',
3867038a 648 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
649 'uploader': 'Philipp Hagemeister',
650 'uploader_id': 'phihag',
ec85ded8 651 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
652 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
653 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 654 'upload_date': '20121002',
3867038a 655 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 656 'categories': ['Science & Technology'],
3867038a 657 'tags': ['youtube-dl'],
556dbe7f 658 'duration': 10,
dbdaaa23 659 'view_count': int,
3e7c1224
PH
660 'like_count': int,
661 'dislike_count': int,
7c80519c 662 'start_time': 1,
297a564b 663 'end_time': 9,
2eb88d95 664 }
0e853ca4 665 },
fccd3771 666 {
4bc3a23e
PH
667 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
668 'note': 'Embed-only video (#1746)',
669 'info_dict': {
670 'id': 'yZIXLfi8CZQ',
671 'ext': 'mp4',
672 'upload_date': '20120608',
673 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
674 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
675 'uploader': 'SET India',
94bfcd23 676 'uploader_id': 'setindia',
ec85ded8 677 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 678 'age_limit': 18,
545cc85d 679 },
680 'skip': 'Private video',
fccd3771 681 },
11b56058 682 {
8bdd16b4 683 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
684 'note': 'Use the first video ID in the URL',
685 'info_dict': {
686 'id': 'BaW_jenozKc',
687 'ext': 'mp4',
3867038a 688 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
689 'uploader': 'Philipp Hagemeister',
690 'uploader_id': 'phihag',
ec85ded8 691 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 692 'upload_date': '20121002',
3867038a 693 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 694 'categories': ['Science & Technology'],
3867038a 695 'tags': ['youtube-dl'],
556dbe7f 696 'duration': 10,
dbdaaa23 697 'view_count': int,
11b56058
PM
698 'like_count': int,
699 'dislike_count': int,
34a7de29
S
700 },
701 'params': {
702 'skip_download': True,
703 },
11b56058 704 },
dd27fd17 705 {
2d3d2997 706 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
707 'note': '256k DASH audio (format 141) via DASH manifest',
708 'info_dict': {
709 'id': 'a9LDPn-MO4I',
710 'ext': 'm4a',
711 'upload_date': '20121002',
712 'uploader_id': '8KVIDEO',
ec85ded8 713 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
714 'description': '',
715 'uploader': '8KVIDEO',
716 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 717 },
4bc3a23e
PH
718 'params': {
719 'youtube_include_dash_manifest': True,
720 'format': '141',
4919603f 721 },
de3c7fe0 722 'skip': 'format 141 not served anymore',
dd27fd17 723 },
8bdd16b4 724 # DASH manifest with encrypted signature
725 {
726 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
727 'info_dict': {
728 'id': 'IB3lcPjvWLA',
729 'ext': 'm4a',
730 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
731 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
732 'duration': 244,
733 'uploader': 'AfrojackVEVO',
734 'uploader_id': 'AfrojackVEVO',
735 'upload_date': '20131011',
cc2db878 736 'abr': 129.495,
8bdd16b4 737 },
738 'params': {
739 'youtube_include_dash_manifest': True,
740 'format': '141/bestaudio[ext=m4a]',
741 },
742 },
aa79ac0c
PH
743 # Controversy video
744 {
745 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
746 'info_dict': {
747 'id': 'T4XJQO3qol8',
748 'ext': 'mp4',
556dbe7f 749 'duration': 219,
aa79ac0c 750 'upload_date': '20100909',
4fe54c12 751 'uploader': 'Amazing Atheist',
aa79ac0c 752 'uploader_id': 'TheAmazingAtheist',
ec85ded8 753 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 754 'title': 'Burning Everyone\'s Koran',
545cc85d 755 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 756 }
c522adb1 757 },
dd2d55f1 758 # Normal age-gate video (embed allowed)
c522adb1 759 {
2d3d2997 760 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
761 'info_dict': {
762 'id': 'HtVdAasjOgU',
763 'ext': 'mp4',
764 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 765 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 766 'duration': 142,
c522adb1
JMF
767 'uploader': 'The Witcher',
768 'uploader_id': 'WitcherGame',
ec85ded8 769 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 770 'upload_date': '20140605',
34952f09 771 'age_limit': 18,
c522adb1
JMF
772 },
773 },
8bdd16b4 774 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
775 # YouTube Red ad is not captured for creator
776 {
777 'url': '__2ABJjxzNo',
778 'info_dict': {
779 'id': '__2ABJjxzNo',
780 'ext': 'mp4',
781 'duration': 266,
782 'upload_date': '20100430',
783 'uploader_id': 'deadmau5',
784 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 785 'creator': 'deadmau5',
786 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 787 'uploader': 'deadmau5',
788 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 789 'alt_title': 'Some Chords',
8bdd16b4 790 },
791 'expected_warnings': [
792 'DASH manifest missing',
793 ]
794 },
067aa17e 795 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
796 {
797 'url': 'lqQg6PlCWgI',
798 'info_dict': {
799 'id': 'lqQg6PlCWgI',
800 'ext': 'mp4',
556dbe7f 801 'duration': 6085,
90227264 802 'upload_date': '20150827',
cbe2bd91 803 'uploader_id': 'olympic',
ec85ded8 804 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 805 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 806 'uploader': 'Olympic',
cbe2bd91
PH
807 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
808 },
809 'params': {
810 'skip_download': 'requires avconv',
e52a40ab 811 }
cbe2bd91 812 },
6271f1ca
PH
813 # Non-square pixels
814 {
815 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
816 'info_dict': {
817 'id': '_b-2C3KPAM0',
818 'ext': 'mp4',
819 'stretched_ratio': 16 / 9.,
556dbe7f 820 'duration': 85,
6271f1ca
PH
821 'upload_date': '20110310',
822 'uploader_id': 'AllenMeow',
ec85ded8 823 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 824 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 825 'uploader': '孫ᄋᄅ',
6271f1ca
PH
826 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
827 },
06b491eb
S
828 },
829 # url_encoded_fmt_stream_map is empty string
830 {
831 'url': 'qEJwOuvDf7I',
832 'info_dict': {
833 'id': 'qEJwOuvDf7I',
f57b7835 834 'ext': 'webm',
06b491eb
S
835 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
836 'description': '',
837 'upload_date': '20150404',
838 'uploader_id': 'spbelect',
839 'uploader': 'Наблюдатели Петербурга',
840 },
841 'params': {
842 'skip_download': 'requires avconv',
e323cf3f
S
843 },
844 'skip': 'This live event has ended.',
06b491eb 845 },
067aa17e 846 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
847 {
848 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
849 'info_dict': {
850 'id': 'FIl7x6_3R5Y',
eb6793ba 851 'ext': 'webm',
da77d856
S
852 'title': 'md5:7b81415841e02ecd4313668cde88737a',
853 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 854 'duration': 220,
da77d856
S
855 'upload_date': '20150625',
856 'uploader_id': 'dorappi2000',
ec85ded8 857 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 858 'uploader': 'dorappi2000',
eb6793ba 859 'formats': 'mincount:31',
da77d856 860 },
eb6793ba 861 'skip': 'not actual anymore',
2ee8f5d8 862 },
8a1a26ce
YCH
863 # DASH manifest with segment_list
864 {
865 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
866 'md5': '8ce563a1d667b599d21064e982ab9e31',
867 'info_dict': {
868 'id': 'CsmdDsKjzN8',
869 'ext': 'mp4',
17ee98e1 870 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
871 'uploader': 'Airtek',
872 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
873 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
874 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
875 },
876 'params': {
877 'youtube_include_dash_manifest': True,
878 'format': '135', # bestvideo
be49068d
S
879 },
880 'skip': 'This live event has ended.',
2ee8f5d8 881 },
cf7e015f
S
882 {
883 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 884 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 885 'info_dict': {
545cc85d 886 'id': 'jvGDaLqkpTg',
887 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
888 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
889 },
890 'playlist': [{
891 'info_dict': {
545cc85d 892 'id': 'jvGDaLqkpTg',
cf7e015f 893 'ext': 'mp4',
545cc85d 894 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
895 'description': 'md5:e03b909557865076822aa169218d6a5d',
896 'duration': 10643,
897 'upload_date': '20161111',
898 'uploader': 'Team PGP',
899 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
900 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
901 },
902 }, {
903 'info_dict': {
545cc85d 904 'id': '3AKt1R1aDnw',
cf7e015f 905 'ext': 'mp4',
545cc85d 906 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
907 'description': 'md5:e03b909557865076822aa169218d6a5d',
908 'duration': 10991,
909 'upload_date': '20161111',
910 'uploader': 'Team PGP',
911 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
912 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
913 },
914 }, {
915 'info_dict': {
545cc85d 916 'id': 'RtAMM00gpVc',
cf7e015f 917 'ext': 'mp4',
545cc85d 918 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
919 'description': 'md5:e03b909557865076822aa169218d6a5d',
920 'duration': 10995,
921 'upload_date': '20161111',
922 'uploader': 'Team PGP',
923 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
924 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
925 },
926 }, {
927 'info_dict': {
545cc85d 928 'id': '6N2fdlP3C5U',
cf7e015f 929 'ext': 'mp4',
545cc85d 930 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
931 'description': 'md5:e03b909557865076822aa169218d6a5d',
932 'duration': 10990,
933 'upload_date': '20161111',
934 'uploader': 'Team PGP',
935 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
936 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
937 },
938 }],
939 'params': {
940 'skip_download': True,
941 },
cbaed4bb 942 },
f9f49d87 943 {
067aa17e 944 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
945 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
946 'info_dict': {
947 'id': 'gVfLd0zydlo',
948 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
949 },
950 'playlist_count': 2,
be49068d 951 'skip': 'Not multifeed anymore',
f9f49d87 952 },
cbaed4bb 953 {
2d3d2997 954 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 955 'only_matching': True,
0e49d9a6 956 },
6d4fc66b 957 {
2d3d2997 958 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
959 'only_matching': True,
960 },
0e49d9a6 961 {
067aa17e 962 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 963 # Also tests cut-off URL expansion in video description (see
067aa17e
S
964 # https://github.com/ytdl-org/youtube-dl/issues/1892,
965 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
966 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
967 'info_dict': {
968 'id': 'lsguqyKfVQg',
969 'ext': 'mp4',
970 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 971 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 972 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 973 'duration': 133,
0e49d9a6
LL
974 'upload_date': '20151119',
975 'uploader_id': 'IronSoulElf',
ec85ded8 976 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 977 'uploader': 'IronSoulElf',
eb6793ba
S
978 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
979 'track': 'Dark Walk - Position Music',
980 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 981 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
982 },
983 'params': {
984 'skip_download': True,
985 },
986 },
61f92af1 987 {
067aa17e 988 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
989 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
990 'only_matching': True,
991 },
313dfc45
LL
992 {
993 # Video with yt:stretch=17:0
994 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
995 'info_dict': {
996 'id': 'Q39EVAstoRM',
997 'ext': 'mp4',
998 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
999 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1000 'upload_date': '20151107',
1001 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1002 'uploader': 'CH GAMER DROID',
1003 },
1004 'params': {
1005 'skip_download': True,
1006 },
be49068d 1007 'skip': 'This video does not exist.',
313dfc45 1008 },
201c1459 1009 {
1010 # Video with incomplete 'yt:stretch=16:'
1011 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1012 'only_matching': True,
1013 },
7caf9830
S
1014 {
1015 # Video licensed under Creative Commons
1016 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1017 'info_dict': {
1018 'id': 'M4gD1WSo5mA',
1019 'ext': 'mp4',
1020 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1021 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1022 'duration': 721,
7caf9830
S
1023 'upload_date': '20150127',
1024 'uploader_id': 'BerkmanCenter',
ec85ded8 1025 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1026 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1027 'license': 'Creative Commons Attribution license (reuse allowed)',
1028 },
1029 'params': {
1030 'skip_download': True,
1031 },
1032 },
fd050249
S
1033 {
1034 # Channel-like uploader_url
1035 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1036 'info_dict': {
1037 'id': 'eQcmzGIKrzg',
1038 'ext': 'mp4',
1039 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1040 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1041 'duration': 4060,
fd050249 1042 'upload_date': '20151119',
eb6793ba 1043 'uploader': 'Bernie Sanders',
fd050249 1044 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1046 'license': 'Creative Commons Attribution license (reuse allowed)',
1047 },
1048 'params': {
1049 'skip_download': True,
1050 },
1051 },
040ac686
S
1052 {
1053 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1054 'only_matching': True,
7f29cf54
S
1055 },
1056 {
067aa17e 1057 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1058 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1059 'only_matching': True,
6496ccb4
S
1060 },
1061 {
1062 # Rental video preview
1063 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1064 'info_dict': {
1065 'id': 'uGpuVWrhIzE',
1066 'ext': 'mp4',
1067 'title': 'Piku - Trailer',
1068 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1069 'upload_date': '20150811',
1070 'uploader': 'FlixMatrix',
1071 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1072 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1073 'license': 'Standard YouTube License',
1074 },
1075 'params': {
1076 'skip_download': True,
1077 },
eb6793ba 1078 'skip': 'This video is not available.',
022a5d66 1079 },
12afdc2a
S
1080 {
1081 # YouTube Red video with episode data
1082 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1083 'info_dict': {
1084 'id': 'iqKdEhx-dD4',
1085 'ext': 'mp4',
1086 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1087 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1088 'duration': 2085,
12afdc2a
S
1089 'upload_date': '20170118',
1090 'uploader': 'Vsauce',
1091 'uploader_id': 'Vsauce',
1092 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1093 'series': 'Mind Field',
1094 'season_number': 1,
1095 'episode_number': 1,
1096 },
1097 'params': {
1098 'skip_download': True,
1099 },
1100 'expected_warnings': [
1101 'Skipping DASH manifest',
1102 ],
1103 },
c7121fa7
S
1104 {
1105 # The following content has been identified by the YouTube community
1106 # as inappropriate or offensive to some audiences.
1107 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1108 'info_dict': {
1109 'id': '6SJNVb0GnPI',
1110 'ext': 'mp4',
1111 'title': 'Race Differences in Intelligence',
1112 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1113 'duration': 965,
1114 'upload_date': '20140124',
1115 'uploader': 'New Century Foundation',
1116 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1117 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1118 },
1119 'params': {
1120 'skip_download': True,
1121 },
545cc85d 1122 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1123 },
022a5d66
S
1124 {
1125 # itag 212
1126 'url': '1t24XAntNCY',
1127 'only_matching': True,
fd5c4aab
S
1128 },
1129 {
1130 # geo restricted to JP
1131 'url': 'sJL6WA-aGkQ',
1132 'only_matching': True,
1133 },
cd5a74a2
S
1134 {
1135 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1136 'only_matching': True,
1137 },
bc2ca1bb 1138 {
1139 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1140 'only_matching': True,
1141 },
1142 {
1143 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1144 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1145 'only_matching': True,
1146 },
825cd268
RA
1147 {
1148 # DRM protected
1149 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1150 'only_matching': True,
4fe54c12
S
1151 },
1152 {
1153 # Video with unsupported adaptive stream type formats
1154 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1155 'info_dict': {
1156 'id': 'Z4Vy8R84T1U',
1157 'ext': 'mp4',
1158 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1159 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1160 'duration': 433,
1161 'upload_date': '20130923',
1162 'uploader': 'Amelia Putri Harwita',
1163 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1164 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1165 'formats': 'maxcount:10',
1166 },
1167 'params': {
1168 'skip_download': True,
1169 'youtube_include_dash_manifest': False,
1170 },
5429d6a9 1171 'skip': 'not actual anymore',
5caabd3c 1172 },
1173 {
822b9d9c 1174 # Youtube Music Auto-generated description
5caabd3c 1175 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1176 'info_dict': {
1177 'id': 'MgNrAu2pzNs',
1178 'ext': 'mp4',
1179 'title': 'Voyeur Girl',
1180 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1181 'upload_date': '20190312',
5429d6a9
S
1182 'uploader': 'Stephen - Topic',
1183 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1184 'artist': 'Stephen',
1185 'track': 'Voyeur Girl',
1186 'album': 'it\'s too much love to know my dear',
1187 'release_date': '20190313',
1188 'release_year': 2019,
1189 },
1190 'params': {
1191 'skip_download': True,
1192 },
1193 },
66b48727
RA
1194 {
1195 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1196 'only_matching': True,
1197 },
011e75e6
S
1198 {
1199 # invalid -> valid video id redirection
1200 'url': 'DJztXj2GPfl',
1201 'info_dict': {
1202 'id': 'DJztXj2GPfk',
1203 'ext': 'mp4',
1204 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1205 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1206 'upload_date': '20090125',
1207 'uploader': 'Prochorowka',
1208 'uploader_id': 'Prochorowka',
1209 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1210 'artist': 'Panjabi MC',
1211 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1212 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1213 },
1214 'params': {
1215 'skip_download': True,
1216 },
545cc85d 1217 'skip': 'Video unavailable',
ea74e00b
DP
1218 },
1219 {
1220 # empty description results in an empty string
1221 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1222 'info_dict': {
1223 'id': 'x41yOUIvK2k',
1224 'ext': 'mp4',
1225 'title': 'IMG 3456',
1226 'description': '',
1227 'upload_date': '20170613',
1228 'uploader_id': 'ElevageOrVert',
1229 'uploader': 'ElevageOrVert',
1230 },
1231 'params': {
1232 'skip_download': True,
1233 },
1234 },
a0566bbf 1235 {
29f7c58a 1236 # with '};' inside yt initial data (see [1])
1237 # see [2] for an example with '};' inside ytInitialPlayerResponse
1238 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1239 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1240 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1241 'info_dict': {
1242 'id': 'CHqg6qOn4no',
1243 'ext': 'mp4',
1244 'title': 'Part 77 Sort a list of simple types in c#',
1245 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1246 'upload_date': '20130831',
1247 'uploader_id': 'kudvenkat',
1248 'uploader': 'kudvenkat',
1249 },
1250 'params': {
1251 'skip_download': True,
1252 },
1253 },
29f7c58a 1254 {
1255 # another example of '};' in ytInitialData
1256 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1257 'only_matching': True,
1258 },
1259 {
1260 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1261 'only_matching': True,
1262 },
545cc85d 1263 {
cc2db878 1264 # https://github.com/ytdl-org/youtube-dl/pull/28094
1265 'url': 'OtqTfy26tG0',
1266 'info_dict': {
1267 'id': 'OtqTfy26tG0',
1268 'ext': 'mp4',
1269 'title': 'Burn Out',
1270 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1271 'upload_date': '20141120',
1272 'uploader': 'The Cinematic Orchestra - Topic',
1273 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1274 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1275 'artist': 'The Cinematic Orchestra',
1276 'track': 'Burn Out',
1277 'album': 'Every Day',
1278 'release_data': None,
1279 'release_year': None,
1280 },
1281 'params': {
1282 'skip_download': True,
1283 },
545cc85d 1284 },
bc2ca1bb 1285 {
1286 # controversial video, only works with bpctr when authenticated with cookies
1287 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1288 'only_matching': True,
1289 },
f7ad7160 1290 {
1291 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1292 'url': 'cBvYw8_A0vQ',
1293 'info_dict': {
1294 'id': 'cBvYw8_A0vQ',
1295 'ext': 'mp4',
1296 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1297 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1298 'upload_date': '20201120',
1299 'uploader': 'Walk around Japan',
1300 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1301 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1302 },
1303 'params': {
1304 'skip_download': True,
1305 },
1306 },
2eb88d95
PH
1307 ]
1308
201c1459 1309 @classmethod
1310 def suitable(cls, url):
1bdae7d3 1311 # Hack for lazy extractors until more generic solution is implemented
1312 # (see #28780)
1313 from .youtube import parse_qs
201c1459 1314 qs = parse_qs(url)
1315 if qs.get('list', [None])[0]:
1316 return False
1317 return super(YoutubeIE, cls).suitable(url)
1318
e0df6211
PH
1319 def __init__(self, *args, **kwargs):
1320 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1321 self._code_cache = {}
83799698 1322 self._player_cache = {}
e0df6211 1323
60064c53
PH
1324 def _signature_cache_id(self, example_sig):
1325 """ Return a string representation of a signature """
78caa52a 1326 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1327
e40c758c
S
1328 @classmethod
1329 def _extract_player_info(cls, player_url):
1330 for player_re in cls._PLAYER_INFO_RE:
1331 id_m = re.search(player_re, player_url)
1332 if id_m:
1333 break
1334 else:
c081b35c 1335 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1336 return id_m.group('id')
e40c758c
S
1337
1338 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1339 player_id = self._extract_player_info(player_url)
e0df6211 1340
c4417ddb 1341 # Read from filesystem cache
545cc85d 1342 func_id = 'js_%s_%s' % (
1343 player_id, self._signature_cache_id(example_sig))
c4417ddb 1344 assert os.path.basename(func_id) == func_id
a0e07d31 1345
69ea8ca4 1346 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1347 if cache_spec is not None:
78caa52a 1348 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1349
545cc85d 1350 if player_id not in self._code_cache:
1351 self._code_cache[player_id] = self._download_webpage(
e0df6211 1352 player_url, video_id,
545cc85d 1353 note='Downloading player ' + player_id,
69ea8ca4 1354 errnote='Download of %s failed' % player_url)
545cc85d 1355 code = self._code_cache[player_id]
1356 res = self._parse_sig_js(code)
e0df6211 1357
785521bf
PH
1358 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1359 cache_res = res(test_string)
1360 cache_spec = [ord(c) for c in cache_res]
83799698 1361
69ea8ca4 1362 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1363 return res
1364
60064c53 1365 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1366 def gen_sig_code(idxs):
1367 def _genslice(start, end, step):
78caa52a 1368 starts = '' if start == 0 else str(start)
8bcc8756 1369 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1370 steps = '' if step == 1 else (':%d' % step)
78caa52a 1371 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1372
1373 step = None
7af808a5
PH
1374 # Quelch pyflakes warnings - start will be set when step is set
1375 start = '(Never used)'
edf3e38e
PH
1376 for i, prev in zip(idxs[1:], idxs[:-1]):
1377 if step is not None:
1378 if i - prev == step:
1379 continue
1380 yield _genslice(start, prev, step)
1381 step = None
1382 continue
1383 if i - prev in [-1, 1]:
1384 step = i - prev
1385 start = prev
1386 continue
1387 else:
78caa52a 1388 yield 's[%d]' % prev
edf3e38e 1389 if step is None:
78caa52a 1390 yield 's[%d]' % i
edf3e38e
PH
1391 else:
1392 yield _genslice(start, i, step)
1393
78caa52a 1394 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1395 cache_res = func(test_string)
edf3e38e 1396 cache_spec = [ord(c) for c in cache_res]
78caa52a 1397 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1398 signature_id_tuple = '(%s)' % (
1399 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1400 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1401 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1402 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1403
e0df6211
PH
1404 def _parse_sig_js(self, jscode):
1405 funcname = self._search_regex(
abefc03f
S
1406 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1407 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1408 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1409 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1410 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1411 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1412 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1413 # Obsolete patterns
1414 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1415 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1416 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1417 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1418 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1419 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1420 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1421 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1422 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1423
1424 jsi = JSInterpreter(jscode)
1425 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1426 return lambda s: initial_function([s])
1427
545cc85d 1428 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1429 """Turn the encrypted s field into a working signature"""
6b37f0be 1430
c8bf86d5 1431 if player_url is None:
69ea8ca4 1432 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1433
69ea8ca4 1434 if player_url.startswith('//'):
78caa52a 1435 player_url = 'https:' + player_url
3c90cc8b
S
1436 elif not re.match(r'https?://', player_url):
1437 player_url = compat_urlparse.urljoin(
1438 'https://www.youtube.com', player_url)
c8bf86d5 1439 try:
62af3a0e 1440 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1441 if player_id not in self._player_cache:
1442 func = self._extract_signature_function(
60064c53 1443 video_id, player_url, s
c8bf86d5
PH
1444 )
1445 self._player_cache[player_id] = func
1446 func = self._player_cache[player_id]
1447 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1448 self._print_sig_code(func, s)
c8bf86d5
PH
1449 return func(s)
1450 except Exception as e:
1451 tb = traceback.format_exc()
1452 raise ExtractorError(
78caa52a 1453 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1454
545cc85d 1455 def _mark_watched(self, video_id, player_response):
21c340b8
S
1456 playback_url = url_or_none(try_get(
1457 player_response,
545cc85d 1458 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1459 if not playback_url:
1460 return
1461 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1462 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1463
1464 # cpn generation algorithm is reverse engineered from base.js.
1465 # In fact it works even with dummy cpn.
1466 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1467 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1468
1469 qs.update({
1470 'ver': ['2'],
1471 'cpn': [cpn],
1472 })
1473 playback_url = compat_urlparse.urlunparse(
15707c7e 1474 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1475
1476 self._download_webpage(
1477 playback_url, video_id, 'Marking watched',
1478 'Unable to mark watched', fatal=False)
1479
66c9fa36
S
1480 @staticmethod
1481 def _extract_urls(webpage):
1482 # Embedded YouTube player
1483 entries = [
1484 unescapeHTML(mobj.group('url'))
1485 for mobj in re.finditer(r'''(?x)
1486 (?:
1487 <iframe[^>]+?src=|
1488 data-video-url=|
1489 <embed[^>]+?src=|
1490 embedSWF\(?:\s*|
1491 <object[^>]+data=|
1492 new\s+SWFObject\(
1493 )
1494 (["\'])
1495 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1496 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1497 \1''', webpage)]
1498
1499 # lazyYT YouTube embed
1500 entries.extend(list(map(
1501 unescapeHTML,
1502 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1503
1504 # Wordpress "YouTube Video Importer" plugin
1505 matches = re.findall(r'''(?x)<div[^>]+
1506 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1507 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1508 entries.extend(m[-1] for m in matches)
1509
1510 return entries
1511
1512 @staticmethod
1513 def _extract_url(webpage):
1514 urls = YoutubeIE._extract_urls(webpage)
1515 return urls[0] if urls else None
1516
97665381
PH
1517 @classmethod
1518 def extract_id(cls, url):
1519 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1520 if mobj is None:
69ea8ca4 1521 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1522 video_id = mobj.group(2)
1523 return video_id
1524
545cc85d 1525 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1526 chapters_list = try_get(
8bdd16b4 1527 data,
84213ea8
S
1528 lambda x: x['playerOverlays']
1529 ['playerOverlayRenderer']
1530 ['decoratedPlayerBarRenderer']
1531 ['decoratedPlayerBarRenderer']
1532 ['playerBar']
1533 ['chapteredPlayerBarRenderer']
1534 ['chapters'],
1535 list)
1536 if not chapters_list:
1537 return
1538
1539 def chapter_time(chapter):
1540 return float_or_none(
1541 try_get(
1542 chapter,
1543 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1544 int),
1545 scale=1000)
1546 chapters = []
1547 for next_num, chapter in enumerate(chapters_list, start=1):
1548 start_time = chapter_time(chapter)
1549 if start_time is None:
1550 continue
1551 end_time = (chapter_time(chapters_list[next_num])
1552 if next_num < len(chapters_list) else duration)
1553 if end_time is None:
1554 continue
1555 title = try_get(
1556 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1557 compat_str)
1558 chapters.append({
1559 'start_time': start_time,
1560 'end_time': end_time,
1561 'title': title,
1562 })
1563 return chapters
1564
545cc85d 1565 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1566 return self._parse_json(self._search_regex(
1567 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1568 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1569
d92f5d5a 1570 @staticmethod
1571 def parse_time_text(time_text):
1572 """
1573 Parse the comment time text
1574 time_text is in the format 'X units ago (edited)'
1575 """
1576 time_text_split = time_text.split(' ')
1577 if len(time_text_split) >= 3:
1578 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1579
a1c5d2ca
M
1580 @staticmethod
1581 def _join_text_entries(runs):
1582 text = None
1583 for run in runs:
1584 if not isinstance(run, dict):
1585 continue
1586 sub_text = try_get(run, lambda x: x['text'], compat_str)
1587 if sub_text:
1588 if not text:
1589 text = sub_text
1590 continue
1591 text += sub_text
1592 return text
1593
1594 def _extract_comment(self, comment_renderer, parent=None):
1595 comment_id = comment_renderer.get('commentId')
1596 if not comment_id:
1597 return
1598 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1599 text = self._join_text_entries(comment_text_runs) or ''
1600 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1601 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1602 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1603 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1604 author_id = try_get(comment_renderer,
1605 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1606 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1607 lambda x: x['likeCount']), compat_str)) or 0
1608 author_thumbnail = try_get(comment_renderer,
1609 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1610
1611 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1612 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1613 return {
1614 'id': comment_id,
1615 'text': text,
d92f5d5a 1616 'timestamp': timestamp,
a1c5d2ca
M
1617 'time_text': time_text,
1618 'like_count': votes,
1619 'is_favorited': is_liked,
1620 'author': author,
1621 'author_id': author_id,
1622 'author_thumbnail': author_thumbnail,
1623 'author_is_uploader': author_is_uploader,
1624 'parent': parent or 'root'
1625 }
1626
1627 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1628 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1629
1630 def extract_thread(parent_renderer):
1631 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1632 if not parent:
1633 comment_counts[2] = 0
1634 for content in contents:
1635 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1636 comment_renderer = try_get(
1637 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1638 content, (lambda x: x['commentRenderer'], dict))
1639
1640 if not comment_renderer:
1641 continue
1642 comment = self._extract_comment(comment_renderer, parent)
1643 if not comment:
1644 continue
1645 comment_counts[0] += 1
1646 yield comment
1647 # Attempt to get the replies
1648 comment_replies_renderer = try_get(
1649 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1650
1651 if comment_replies_renderer:
1652 comment_counts[2] += 1
1653 comment_entries_iter = self._comment_entries(
f4f751af 1654 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1655 parent=comment.get('id'), session_token_list=session_token_list,
1656 comment_counts=comment_counts)
1657
1658 for reply_comment in comment_entries_iter:
1659 yield reply_comment
1660
1661 if not comment_counts:
1662 # comment so far, est. total comments, current comment thread #
1663 comment_counts = [0, 0, 0]
a1c5d2ca
M
1664
1665 # TODO: Generalize the download code with TabIE
f4f751af 1666 context = self._extract_context(ytcfg)
1667 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1668 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1669 first_continuation = False
1670 if parent is None:
1671 first_continuation = True
1672
1673 for page_num in itertools.count(0):
1674 if not continuation:
1675 break
f4f751af 1676 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a1c5d2ca
M
1677 retries = self._downloader.params.get('extractor_retries', 3)
1678 count = -1
1679 last_error = None
1680
1681 while count < retries:
1682 count += 1
1683 if last_error:
1684 self.report_warning('%s. Retrying ...' % last_error)
1685 try:
1686 query = {
1687 'ctoken': continuation['ctoken'],
1688 'pbj': 1,
1689 'type': 'next',
1690 }
1691 if parent:
1692 query['action_get_comment_replies'] = 1
1693 else:
1694 query['action_get_comments'] = 1
1695
1696 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1697 if page_num == 0:
1698 if first_continuation:
d92f5d5a 1699 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1700 else:
d92f5d5a 1701 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1702 else:
d92f5d5a 1703 note_prefix = '%sDownloading comment%s page %d %s' % (
1704 ' ' if parent else '',
a1c5d2ca
M
1705 ' replies' if parent else '',
1706 page_num,
1707 comment_prog_str)
1708
1709 browse = self._download_json(
1710 'https://www.youtube.com/comment_service_ajax', None,
1711 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1712 headers=headers, query=query,
1713 data=urlencode_postdata({
1714 'session_token': session_token_list[0]
1715 }))
1716 except ExtractorError as e:
1717 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1718 if e.cause.code == 413:
d92f5d5a 1719 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1720 return
1721 # Downloading page may result in intermittent 5xx HTTP error
1722 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1723 last_error = 'HTTP Error %s' % e.cause.code
1724 if e.cause.code == 404:
d92f5d5a 1725 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1726 if count < retries:
1727 continue
1728 raise
1729 else:
1730 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1731 if session_token:
1732 session_token_list[0] = session_token
1733
1734 response = try_get(browse,
1735 (lambda x: x['response'],
1736 lambda x: x[1]['response'])) or {}
1737
1738 if response.get('continuationContents'):
1739 break
1740
1741 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1742 if browse.get('reload'):
d92f5d5a 1743 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1744
1745 # TODO: not tested, merged from old extractor
1746 err_msg = browse.get('externalErrorMessage')
1747 if err_msg:
1748 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1749
1750 # Youtube sometimes sends incomplete data
1751 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1752 last_error = 'Incomplete data received'
1753 if count >= retries:
6a39ee13 1754 raise ExtractorError(last_error)
a1c5d2ca
M
1755
1756 if not response:
1757 break
f4f751af 1758 visitor_data = try_get(
1759 response,
1760 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1761 compat_str) or visitor_data
a1c5d2ca
M
1762
1763 known_continuation_renderers = {
1764 'itemSectionContinuation': extract_thread,
1765 'commentRepliesContinuation': extract_thread
1766 }
1767
1768 # extract next root continuation from the results
1769 continuation_contents = try_get(
1770 response, lambda x: x['continuationContents'], dict) or {}
1771
1772 for key, value in continuation_contents.items():
1773 if key not in known_continuation_renderers:
1774 continue
1775 continuation_renderer = value
1776
1777 if first_continuation:
1778 first_continuation = False
1779 expected_comment_count = try_get(
1780 continuation_renderer,
1781 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1782 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1783 compat_str)
1784
1785 if expected_comment_count:
1786 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1787 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1788 yield comment_counts[1]
1789
1790 # TODO: cli arg.
1791 # 1/True for newest, 0/False for popular (default)
1792 comment_sort_index = int(True)
1793 sort_continuation_renderer = try_get(
1794 continuation_renderer,
1795 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1796 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1797 # If this fails, the initial continuation page
1798 # starts off with popular anyways.
1799 if sort_continuation_renderer:
1800 continuation = YoutubeTabIE._build_continuation_query(
1801 continuation=sort_continuation_renderer.get('continuation'),
1802 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1803 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1804 break
1805
1806 for entry in known_continuation_renderers[key](continuation_renderer):
1807 yield entry
1808
1809 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1810 break
1811
1812 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1813 """Entry for comment extraction"""
1814 comments = []
1815 known_entry_comment_renderers = (
1816 'itemSectionRenderer',
1817 )
1818 estimated_total = 0
1819 for entry in contents:
1820 for key, renderer in entry.items():
1821 if key not in known_entry_comment_renderers:
1822 continue
1823
1824 comment_iter = self._comment_entries(
1825 renderer,
1826 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1827 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1828 ytcfg=ytcfg,
a1c5d2ca
M
1829 session_token_list=[xsrf_token])
1830
1831 for comment in comment_iter:
1832 if isinstance(comment, int):
1833 estimated_total = comment
1834 continue
1835 comments.append(comment)
1836 break
d92f5d5a 1837 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1838 return {
1839 'comments': comments,
1840 'comment_count': len(comments),
1841 }
1842
c5e8d7af 1843 def _real_extract(self, url):
cf7e015f 1844 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1845 video_id = self._match_id(url)
1846 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1847 webpage_url = base_url + 'watch?v=' + video_id
1848 webpage = self._download_webpage(
cce889b9 1849 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1850
1851 player_response = None
1852 if webpage:
1853 player_response = self._extract_yt_initial_variable(
1854 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1855 video_id, 'initial player response')
f4f751af 1856
1857 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1858 if not player_response:
1859 player_response = self._call_api(
f4f751af 1860 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1861
1862 playability_status = player_response.get('playabilityStatus') or {}
1863 if playability_status.get('reason') == 'Sign in to confirm your age':
1864 pr = self._parse_json(try_get(compat_parse_qs(
1865 self._download_webpage(
1866 base_url + 'get_video_info', video_id,
1867 'Refetching age-gated info webpage',
1868 'unable to download video info webpage', query={
1869 'video_id': video_id,
7c60c33e 1870 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1871 }, fatal=False)),
1872 lambda x: x['player_response'][0],
1873 compat_str) or '{}', video_id)
1874 if pr:
1875 player_response = pr
1876
1877 trailer_video_id = try_get(
1878 playability_status,
1879 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1880 compat_str)
1881 if trailer_video_id:
1882 return self.url_result(
1883 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1884
545cc85d 1885 def get_text(x):
1886 if not x:
c2d125d9 1887 return
f7ad7160 1888 text = x.get('simpleText')
1889 if text and isinstance(text, compat_str):
1890 return text
1891 runs = x.get('runs')
1892 if not isinstance(runs, list):
1893 return
1894 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
15be3eb5 1895
545cc85d 1896 search_meta = (
1897 lambda x: self._html_search_meta(x, webpage, default=None)) \
1898 if webpage else lambda x: None
dbdaaa23 1899
545cc85d 1900 video_details = player_response.get('videoDetails') or {}
37357d21 1901 microformat = try_get(
545cc85d 1902 player_response,
1903 lambda x: x['microformat']['playerMicroformatRenderer'],
1904 dict) or {}
1905 video_title = video_details.get('title') \
1906 or get_text(microformat.get('title')) \
1907 or search_meta(['og:title', 'twitter:title', 'title'])
1908 video_description = video_details.get('shortDescription')
cf7e015f 1909
8fe10494 1910 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1911 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1912 multifeed_metadata_list = try_get(
1913 player_response,
1914 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1915 compat_str)
8fe10494
S
1916 if multifeed_metadata_list:
1917 entries = []
1918 feed_ids = []
1919 for feed in multifeed_metadata_list.split(','):
1920 # Unquote should take place before split on comma (,) since textual
1921 # fields may contain comma as well (see
067aa17e 1922 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1923 feed_data = compat_parse_qs(
1924 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1925
1926 def feed_entry(name):
545cc85d 1927 return try_get(
1928 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1929
1930 feed_id = feed_entry('id')
1931 if not feed_id:
1932 continue
1933 feed_title = feed_entry('title')
1934 title = video_title
1935 if feed_title:
1936 title += ' (%s)' % feed_title
8fe10494
S
1937 entries.append({
1938 '_type': 'url_transparent',
1939 'ie_key': 'Youtube',
1940 'url': smuggle_url(
545cc85d 1941 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1942 {'force_singlefeed': True}),
6b09401b 1943 'title': title,
8fe10494 1944 })
6b09401b 1945 feed_ids.append(feed_id)
8fe10494
S
1946 self.to_screen(
1947 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1948 % (', '.join(feed_ids), video_id))
545cc85d 1949 return self.playlist_result(
1950 entries, video_id, video_title, video_description)
8fe10494
S
1951 else:
1952 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1953
545cc85d 1954 formats = []
1955 itags = []
cc2db878 1956 itag_qualities = {}
545cc85d 1957 player_url = None
dca3ff4a 1958 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1959 streaming_data = player_response.get('streamingData') or {}
1960 streaming_formats = streaming_data.get('formats') or []
1961 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1962 for fmt in streaming_formats:
1963 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1964 continue
321bf820 1965
cc2db878 1966 itag = str_or_none(fmt.get('itag'))
1967 quality = fmt.get('quality')
1968 if itag and quality:
1969 itag_qualities[itag] = quality
1970 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1971 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1972 # number of fragment that would subsequently requested with (`&sq=N`)
1973 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1974 continue
1975
545cc85d 1976 fmt_url = fmt.get('url')
1977 if not fmt_url:
1978 sc = compat_parse_qs(fmt.get('signatureCipher'))
1979 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1980 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1981 if not (sc and fmt_url and encrypted_sig):
1982 continue
1983 if not player_url:
1984 if not webpage:
1985 continue
1986 player_url = self._search_regex(
1987 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1988 webpage, 'player URL', fatal=False)
1989 if not player_url:
201e9eaa 1990 continue
545cc85d 1991 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1992 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1993 fmt_url += '&' + sp + '=' + signature
1994
545cc85d 1995 if itag:
1996 itags.append(itag)
cc2db878 1997 tbr = float_or_none(
1998 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1999 dct = {
2000 'asr': int_or_none(fmt.get('audioSampleRate')),
2001 'filesize': int_or_none(fmt.get('contentLength')),
2002 'format_id': itag,
2003 'format_note': fmt.get('qualityLabel') or quality,
2004 'fps': int_or_none(fmt.get('fps')),
2005 'height': int_or_none(fmt.get('height')),
dca3ff4a 2006 'quality': q(quality),
cc2db878 2007 'tbr': tbr,
545cc85d 2008 'url': fmt_url,
2009 'width': fmt.get('width'),
2010 }
2011 mimetype = fmt.get('mimeType')
2012 if mimetype:
2013 mobj = re.match(
2014 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2015 if mobj:
2016 dct['ext'] = mimetype2ext(mobj.group(1))
2017 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2018 no_audio = dct.get('acodec') == 'none'
2019 no_video = dct.get('vcodec') == 'none'
2020 if no_audio:
2021 dct['vbr'] = tbr
2022 if no_video:
2023 dct['abr'] = tbr
2024 if no_audio or no_video:
545cc85d 2025 dct['downloader_options'] = {
2026 # Youtube throttles chunks >~10M
2027 'http_chunk_size': 10485760,
bf1317d2 2028 }
7c60c33e 2029 if dct.get('ext'):
2030 dct['container'] = dct['ext'] + '_dash'
545cc85d 2031 formats.append(dct)
2032
2033 hls_manifest_url = streaming_data.get('hlsManifestUrl')
2034 if hls_manifest_url:
2035 for f in self._extract_m3u8_formats(
2036 hls_manifest_url, video_id, 'mp4', fatal=False):
2037 itag = self._search_regex(
2038 r'/itag/(\d+)', f['url'], 'itag', default=None)
2039 if itag:
2040 f['format_id'] = itag
2041 formats.append(f)
2042
1418a043 2043 if self._downloader.params.get('youtube_include_dash_manifest', True):
545cc85d 2044 dash_manifest_url = streaming_data.get('dashManifestUrl')
2045 if dash_manifest_url:
545cc85d 2046 for f in self._extract_mpd_formats(
2047 dash_manifest_url, video_id, fatal=False):
cc2db878 2048 itag = f['format_id']
2049 if itag in itags:
2050 continue
dca3ff4a 2051 if itag in itag_qualities:
2052 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
2053 # but kept to maintain feature parity (and code similarity) with youtube-dl
2054 # Remove if this causes any issues with sorting in future
2055 f['quality'] = q(itag_qualities[itag])
545cc85d 2056 filesize = int_or_none(self._search_regex(
2057 r'/clen/(\d+)', f.get('fragment_base_url')
2058 or f['url'], 'file size', default=None))
2059 if filesize:
2060 f['filesize'] = filesize
cc2db878 2061 formats.append(f)
bf1317d2 2062
545cc85d 2063 if not formats:
63ad4d43 2064 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2065 self.raise_no_formats(
545cc85d 2066 'This video is DRM protected.', expected=True)
2067 pemr = try_get(
2068 playability_status,
2069 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2070 dict) or {}
2071 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2072 subreason = pemr.get('subreason')
2073 if subreason:
2074 subreason = clean_html(get_text(subreason))
2075 if subreason == 'The uploader has not made this video available in your country.':
2076 countries = microformat.get('availableCountries')
2077 if not countries:
2078 regions_allowed = search_meta('regionsAllowed')
2079 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2080 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2081 reason += '\n' + subreason
2082 if reason:
b7da73eb 2083 self.raise_no_formats(reason, expected=True)
bf1317d2 2084
545cc85d 2085 self._sort_formats(formats)
bf1317d2 2086
545cc85d 2087 keywords = video_details.get('keywords') or []
2088 if not keywords and webpage:
2089 keywords = [
2090 unescapeHTML(m.group('content'))
2091 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2092 for keyword in keywords:
2093 if keyword.startswith('yt:stretch='):
201c1459 2094 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2095 if mobj:
2096 # NB: float is intentional for forcing float division
2097 w, h = (float(v) for v in mobj.groups())
2098 if w > 0 and h > 0:
2099 ratio = w / h
2100 for f in formats:
2101 if f.get('vcodec') != 'none':
2102 f['stretched_ratio'] = ratio
2103 break
6449cd80 2104
545cc85d 2105 thumbnails = []
2106 for container in (video_details, microformat):
2107 for thumbnail in (try_get(
2108 container,
2109 lambda x: x['thumbnail']['thumbnails'], list) or []):
2110 thumbnail_url = thumbnail.get('url')
2111 if not thumbnail_url:
bf1317d2 2112 continue
1988fab7 2113 # Sometimes youtube gives a wrong thumbnail URL. See:
2114 # https://github.com/yt-dlp/yt-dlp/issues/233
2115 # https://github.com/ytdl-org/youtube-dl/issues/28023
2116 if 'maxresdefault' in thumbnail_url:
2117 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2118 thumbnails.append({
2119 'height': int_or_none(thumbnail.get('height')),
2120 'url': thumbnail_url,
2121 'width': int_or_none(thumbnail.get('width')),
2122 })
2123 if thumbnails:
2124 break
a6211d23 2125 else:
545cc85d 2126 thumbnail = search_meta(['og:image', 'twitter:image'])
2127 if thumbnail:
2128 thumbnails = [{'url': thumbnail}]
2129
2130 category = microformat.get('category') or search_meta('genre')
2131 channel_id = video_details.get('channelId') \
2132 or microformat.get('externalChannelId') \
2133 or search_meta('channelId')
2134 duration = int_or_none(
2135 video_details.get('lengthSeconds')
2136 or microformat.get('lengthSeconds')) \
2137 or parse_duration(search_meta('duration'))
2138 is_live = video_details.get('isLive')
2139 owner_profile_url = microformat.get('ownerProfileUrl')
2140
2141 info = {
2142 'id': video_id,
2143 'title': self._live_title(video_title) if is_live else video_title,
2144 'formats': formats,
2145 'thumbnails': thumbnails,
2146 'description': video_description,
2147 'upload_date': unified_strdate(
2148 microformat.get('uploadDate')
2149 or search_meta('uploadDate')),
2150 'uploader': video_details['author'],
2151 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2152 'uploader_url': owner_profile_url,
2153 'channel_id': channel_id,
2154 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2155 'duration': duration,
2156 'view_count': int_or_none(
2157 video_details.get('viewCount')
2158 or microformat.get('viewCount')
2159 or search_meta('interactionCount')),
2160 'average_rating': float_or_none(video_details.get('averageRating')),
2161 'age_limit': 18 if (
2162 microformat.get('isFamilySafe') is False
2163 or search_meta('isFamilyFriendly') == 'false'
2164 or search_meta('og:restrictions:age') == '18+') else 0,
2165 'webpage_url': webpage_url,
2166 'categories': [category] if category else None,
2167 'tags': keywords,
2168 'is_live': is_live,
2169 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2170 'was_live': video_details.get('isLiveContent'),
545cc85d 2171 }
b477fc13 2172
545cc85d 2173 pctr = try_get(
2174 player_response,
2175 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2176 subtitles = {}
2177 if pctr:
2178 def process_language(container, base_url, lang_code, query):
2179 lang_subs = []
2180 for fmt in self._SUBTITLE_FORMATS:
2181 query.update({
2182 'fmt': fmt,
2183 })
2184 lang_subs.append({
2185 'ext': fmt,
2186 'url': update_url_query(base_url, query),
2187 })
2188 container[lang_code] = lang_subs
7e72694b 2189
545cc85d 2190 for caption_track in (pctr.get('captionTracks') or []):
2191 base_url = caption_track.get('baseUrl')
2192 if not base_url:
2193 continue
2194 if caption_track.get('kind') != 'asr':
2195 lang_code = caption_track.get('languageCode')
2196 if not lang_code:
2197 continue
2198 process_language(
2199 subtitles, base_url, lang_code, {})
2200 continue
2201 automatic_captions = {}
2202 for translation_language in (pctr.get('translationLanguages') or []):
2203 translation_language_code = translation_language.get('languageCode')
2204 if not translation_language_code:
2205 continue
2206 process_language(
2207 automatic_captions, base_url, translation_language_code,
2208 {'tlang': translation_language_code})
2209 info['automatic_captions'] = automatic_captions
2210 info['subtitles'] = subtitles
7e72694b 2211
545cc85d 2212 parsed_url = compat_urllib_parse_urlparse(url)
2213 for component in [parsed_url.fragment, parsed_url.query]:
2214 query = compat_parse_qs(component)
2215 for k, v in query.items():
2216 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2217 d_k += '_time'
2218 if d_k not in info and k in s_ks:
2219 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2220
2221 # Youtube Music Auto-generated description
822b9d9c 2222 if video_description:
38d70284 2223 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2224 if mobj:
822b9d9c
RA
2225 release_year = mobj.group('release_year')
2226 release_date = mobj.group('release_date')
2227 if release_date:
2228 release_date = release_date.replace('-', '')
2229 if not release_year:
545cc85d 2230 release_year = release_date[:4]
2231 info.update({
2232 'album': mobj.group('album'.strip()),
2233 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2234 'track': mobj.group('track').strip(),
2235 'release_date': release_date,
cc2db878 2236 'release_year': int_or_none(release_year),
545cc85d 2237 })
7e72694b 2238
545cc85d 2239 initial_data = None
2240 if webpage:
2241 initial_data = self._extract_yt_initial_variable(
2242 webpage, self._YT_INITIAL_DATA_RE, video_id,
2243 'yt initial data')
2244 if not initial_data:
2245 initial_data = self._call_api(
f4f751af 2246 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2247
2248 if not is_live:
2249 try:
2250 # This will error if there is no livechat
2251 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2252 info['subtitles']['live_chat'] = [{
394dcd44 2253 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2254 'video_id': video_id,
2255 'ext': 'json',
2256 'protocol': 'youtube_live_chat_replay',
2257 }]
2258 except (KeyError, IndexError, TypeError):
2259 pass
2260
2261 if initial_data:
2262 chapters = self._extract_chapters_from_json(
2263 initial_data, video_id, duration)
2264 if not chapters:
2265 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2266 contents = try_get(
2267 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2268 list)
2269 if not contents:
2270 continue
2271
2272 def chapter_time(mmlir):
2273 return parse_duration(
2274 get_text(mmlir.get('timeDescription')))
2275
2276 chapters = []
2277 for next_num, content in enumerate(contents, start=1):
2278 mmlir = content.get('macroMarkersListItemRenderer') or {}
2279 start_time = chapter_time(mmlir)
2280 end_time = chapter_time(try_get(
2281 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2282 if next_num < len(contents) else duration
2283 if start_time is None or end_time is None:
2284 continue
2285 chapters.append({
2286 'start_time': start_time,
2287 'end_time': end_time,
2288 'title': get_text(mmlir.get('title')),
2289 })
2290 if chapters:
2291 break
2292 if chapters:
2293 info['chapters'] = chapters
2294
2295 contents = try_get(
2296 initial_data,
2297 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2298 list) or []
2299 for content in contents:
2300 vpir = content.get('videoPrimaryInfoRenderer')
2301 if vpir:
2302 stl = vpir.get('superTitleLink')
2303 if stl:
2304 stl = get_text(stl)
2305 if try_get(
2306 vpir,
2307 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2308 info['location'] = stl
2309 else:
2310 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2311 if mobj:
2312 info.update({
2313 'series': mobj.group(1),
2314 'season_number': int(mobj.group(2)),
2315 'episode_number': int(mobj.group(3)),
2316 })
2317 for tlb in (try_get(
2318 vpir,
2319 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2320 list) or []):
2321 tbr = tlb.get('toggleButtonRenderer') or {}
2322 for getter, regex in [(
2323 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2324 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2325 lambda x: x['accessibility'],
2326 lambda x: x['accessibilityData']['accessibilityData'],
2327 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2328 label = (try_get(tbr, getter, dict) or {}).get('label')
2329 if label:
2330 mobj = re.match(regex, label)
2331 if mobj:
2332 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2333 break
2334 sbr_tooltip = try_get(
2335 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2336 if sbr_tooltip:
2337 like_count, dislike_count = sbr_tooltip.split(' / ')
2338 info.update({
2339 'like_count': str_to_int(like_count),
2340 'dislike_count': str_to_int(dislike_count),
2341 })
2342 vsir = content.get('videoSecondaryInfoRenderer')
2343 if vsir:
2344 info['channel'] = get_text(try_get(
2345 vsir,
2346 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2347 dict))
545cc85d 2348 rows = try_get(
2349 vsir,
2350 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2351 list) or []
2352 multiple_songs = False
2353 for row in rows:
2354 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2355 multiple_songs = True
2356 break
2357 for row in rows:
2358 mrr = row.get('metadataRowRenderer') or {}
2359 mrr_title = mrr.get('title')
2360 if not mrr_title:
2361 continue
2362 mrr_title = get_text(mrr['title'])
2363 mrr_contents_text = get_text(mrr['contents'][0])
2364 if mrr_title == 'License':
2365 info['license'] = mrr_contents_text
2366 elif not multiple_songs:
2367 if mrr_title == 'Album':
2368 info['album'] = mrr_contents_text
2369 elif mrr_title == 'Artist':
2370 info['artist'] = mrr_contents_text
2371 elif mrr_title == 'Song':
2372 info['track'] = mrr_contents_text
2373
2374 fallbacks = {
2375 'channel': 'uploader',
2376 'channel_id': 'uploader_id',
2377 'channel_url': 'uploader_url',
2378 }
2379 for to, frm in fallbacks.items():
2380 if not info.get(to):
2381 info[to] = info.get(frm)
2382
2383 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2384 v = info.get(s_k)
2385 if v:
2386 info[d_k] = v
b84071c0 2387
c224251a
M
2388 is_private = bool_or_none(video_details.get('isPrivate'))
2389 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2390 is_membersonly = None
b28f8d24 2391 is_premium = None
c224251a
M
2392 if initial_data and is_private is not None:
2393 is_membersonly = False
b28f8d24 2394 is_premium = False
c224251a
M
2395 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2396 for content in contents or []:
2397 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2398 for badge in badges or []:
2399 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2400 if label.lower() == 'members only':
2401 is_membersonly = True
2402 break
b28f8d24
M
2403 elif label.lower() == 'premium':
2404 is_premium = True
2405 break
2406 if is_membersonly or is_premium:
c224251a
M
2407 break
2408
2409 # TODO: Add this for playlists
2410 info['availability'] = self._availability(
2411 is_private=is_private,
b28f8d24 2412 needs_premium=is_premium,
c224251a
M
2413 needs_subscription=is_membersonly,
2414 needs_auth=info['age_limit'] >= 18,
2415 is_unlisted=None if is_private is None else is_unlisted)
2416
06167fbb 2417 # get xsrf for annotations or comments
2418 get_annotations = self._downloader.params.get('writeannotations', False)
2419 get_comments = self._downloader.params.get('getcomments', False)
2420 if get_annotations or get_comments:
29f7c58a 2421 xsrf_token = None
545cc85d 2422 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2423 if ytcfg:
2424 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2425 if not xsrf_token:
2426 xsrf_token = self._search_regex(
2427 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2428 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2429
2430 # annotations
06167fbb 2431 if get_annotations:
64b6a4e9
RA
2432 invideo_url = try_get(
2433 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2434 if xsrf_token and invideo_url:
29f7c58a 2435 xsrf_field_name = None
2436 if ytcfg:
2437 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2438 if not xsrf_field_name:
2439 xsrf_field_name = self._search_regex(
2440 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2441 webpage, 'xsrf field name',
29f7c58a 2442 group='xsrf_field_name', default='session_token')
8a784c74 2443 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2444 self._proto_relative_url(invideo_url),
2445 video_id, note='Downloading annotations',
2446 errnote='Unable to download video annotations', fatal=False,
2447 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2448
277d6ff5 2449 if get_comments:
a1c5d2ca 2450 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2451
545cc85d 2452 self.mark_watched(video_id, player_response)
d77ab8e2 2453
545cc85d 2454 return info
c5e8d7af 2455
5f6a1245 2456
8bdd16b4 2457class YoutubeTabIE(YoutubeBaseInfoExtractor):
2458 IE_DESC = 'YouTube.com tab'
70d5c17b 2459 _VALID_URL = r'''(?x)
2460 https?://
2461 (?:\w+\.)?
2462 (?:
2463 youtube(?:kids)?\.com|
2464 invidio\.us
2465 )/
2466 (?:
2467 (?:channel|c|user)/|
2468 (?P<not_channel>
9ba5705a 2469 feed/|hashtag/|
70d5c17b 2470 (?:playlist|watch)\?.*?\blist=
2471 )|
29f7c58a 2472 (?!(?:%s)\b) # Direct URLs
70d5c17b 2473 )
2474 (?P<id>[^/?\#&]+)
2475 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2476 IE_NAME = 'youtube:tab'
2477
81127aa5 2478 _TESTS = [{
8bdd16b4 2479 # playlists, multipage
2480 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2481 'playlist_mincount': 94,
2482 'info_dict': {
2483 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2484 'title': 'Игорь Клейнер - Playlists',
2485 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2486 'uploader': 'Игорь Клейнер',
2487 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2488 },
2489 }, {
2490 # playlists, multipage, different order
2491 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2492 'playlist_mincount': 94,
2493 'info_dict': {
2494 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2495 'title': 'Игорь Клейнер - Playlists',
2496 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2497 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2498 'uploader': 'Игорь Клейнер',
8bdd16b4 2499 },
201c1459 2500 }, {
2501 # playlists, series
2502 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2503 'playlist_mincount': 5,
2504 'info_dict': {
2505 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2506 'title': '3Blue1Brown - Playlists',
2507 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2508 },
8bdd16b4 2509 }, {
2510 # playlists, singlepage
2511 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2512 'playlist_mincount': 4,
2513 'info_dict': {
2514 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2515 'title': 'ThirstForScience - Playlists',
2516 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2517 'uploader': 'ThirstForScience',
2518 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2519 }
2520 }, {
2521 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2522 'only_matching': True,
2523 }, {
2524 # basic, single video playlist
0e30a7b9 2525 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2526 'info_dict': {
0e30a7b9 2527 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2528 'uploader': 'Sergey M.',
2529 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2530 'title': 'youtube-dl public playlist',
81127aa5 2531 },
0e30a7b9 2532 'playlist_count': 1,
9291475f 2533 }, {
8bdd16b4 2534 # empty playlist
0e30a7b9 2535 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2536 'info_dict': {
0e30a7b9 2537 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2538 'uploader': 'Sergey M.',
2539 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2540 'title': 'youtube-dl empty playlist',
9291475f
PH
2541 },
2542 'playlist_count': 0,
2543 }, {
8bdd16b4 2544 # Home tab
2545 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2546 'info_dict': {
8bdd16b4 2547 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2548 'title': 'lex will - Home',
2549 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2550 'uploader': 'lex will',
2551 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2552 },
8bdd16b4 2553 'playlist_mincount': 2,
9291475f 2554 }, {
8bdd16b4 2555 # Videos tab
2556 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2557 'info_dict': {
8bdd16b4 2558 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2559 'title': 'lex will - Videos',
2560 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2561 'uploader': 'lex will',
2562 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2563 },
8bdd16b4 2564 'playlist_mincount': 975,
9291475f 2565 }, {
8bdd16b4 2566 # Videos tab, sorted by popular
2567 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2568 'info_dict': {
8bdd16b4 2569 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2570 'title': 'lex will - Videos',
2571 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2572 'uploader': 'lex will',
2573 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2574 },
8bdd16b4 2575 'playlist_mincount': 199,
9291475f 2576 }, {
8bdd16b4 2577 # Playlists tab
2578 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2579 'info_dict': {
8bdd16b4 2580 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2581 'title': 'lex will - Playlists',
2582 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2583 'uploader': 'lex will',
2584 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2585 },
8bdd16b4 2586 'playlist_mincount': 17,
ac7553d0 2587 }, {
8bdd16b4 2588 # Community tab
2589 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2590 'info_dict': {
8bdd16b4 2591 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2592 'title': 'lex will - Community',
2593 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2594 'uploader': 'lex will',
2595 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2596 },
2597 'playlist_mincount': 18,
87dadd45 2598 }, {
8bdd16b4 2599 # Channels tab
2600 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2601 'info_dict': {
8bdd16b4 2602 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2603 'title': 'lex will - Channels',
2604 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2605 'uploader': 'lex will',
2606 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2607 },
deaec5af 2608 'playlist_mincount': 12,
6b08cdf6 2609 }, {
a0566bbf 2610 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2611 'only_matching': True,
2612 }, {
a0566bbf 2613 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2614 'only_matching': True,
2615 }, {
a0566bbf 2616 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2617 'only_matching': True,
2618 }, {
2619 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2620 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2621 'info_dict': {
2622 'title': '29C3: Not my department',
2623 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2624 'uploader': 'Christiaan008',
2625 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2626 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2627 },
2628 'playlist_count': 96,
2629 }, {
2630 'note': 'Large playlist',
2631 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2632 'info_dict': {
8bdd16b4 2633 'title': 'Uploads from Cauchemar',
2634 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2635 'uploader': 'Cauchemar',
2636 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2637 },
8bdd16b4 2638 'playlist_mincount': 1123,
2639 }, {
2640 # even larger playlist, 8832 videos
2641 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2642 'only_matching': True,
4b7df0d3
JMF
2643 }, {
2644 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2645 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2646 'info_dict': {
acf757f4
PH
2647 'title': 'Uploads from Interstellar Movie',
2648 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2649 'uploader': 'Interstellar Movie',
8bdd16b4 2650 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2651 },
481cc733 2652 'playlist_mincount': 21,
358de58c 2653 }, {
2654 'note': 'Playlist with "show unavailable videos" button',
2655 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2656 'info_dict': {
2657 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2658 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2659 'uploader': 'Phim Siêu Nhân Nhật Bản',
2660 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2661 },
2662 'playlist_mincount': 1400,
2663 'expected_warnings': [
2664 'YouTube said: INFO - Unavailable videos are hidden',
2665 ]
5d342002 2666 }, {
2667 'note': 'Playlist with unavailable videos in a later page',
2668 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2669 'info_dict': {
2670 'title': 'Uploads from BlankTV',
2671 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2672 'uploader': 'BlankTV',
2673 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2674 },
2675 'playlist_mincount': 20000,
8bdd16b4 2676 }, {
2677 # https://github.com/ytdl-org/youtube-dl/issues/21844
2678 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2679 'info_dict': {
2680 'title': 'Data Analysis with Dr Mike Pound',
2681 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2682 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2683 'uploader': 'Computerphile',
deaec5af 2684 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2685 },
2686 'playlist_mincount': 11,
2687 }, {
a0566bbf 2688 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2689 'only_matching': True,
dacb3a86
S
2690 }, {
2691 # Playlist URL that does not actually serve a playlist
2692 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2693 'info_dict': {
2694 'id': 'FqZTN594JQw',
2695 'ext': 'webm',
2696 'title': "Smiley's People 01 detective, Adventure Series, Action",
2697 'uploader': 'STREEM',
2698 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2699 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2700 'upload_date': '20150526',
2701 'license': 'Standard YouTube License',
2702 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2703 'categories': ['People & Blogs'],
2704 'tags': list,
dbdaaa23 2705 'view_count': int,
dacb3a86
S
2706 'like_count': int,
2707 'dislike_count': int,
2708 },
2709 'params': {
2710 'skip_download': True,
2711 },
13a75688 2712 'skip': 'This video is not available.',
dacb3a86 2713 'add_ie': [YoutubeIE.ie_key()],
481cc733 2714 }, {
8bdd16b4 2715 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2716 'only_matching': True,
66b48727 2717 }, {
8bdd16b4 2718 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2719 'only_matching': True,
a0566bbf 2720 }, {
2721 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2722 'info_dict': {
2723 'id': '9Auq9mYxFEE',
2724 'ext': 'mp4',
deaec5af 2725 'title': compat_str,
a0566bbf 2726 'uploader': 'Sky News',
2727 'uploader_id': 'skynews',
2728 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2729 'upload_date': '20191102',
deaec5af 2730 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2731 'categories': ['News & Politics'],
2732 'tags': list,
2733 'like_count': int,
2734 'dislike_count': int,
2735 },
2736 'params': {
2737 'skip_download': True,
2738 },
2739 }, {
2740 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2741 'info_dict': {
2742 'id': 'a48o2S1cPoo',
2743 'ext': 'mp4',
2744 'title': 'The Young Turks - Live Main Show',
2745 'uploader': 'The Young Turks',
2746 'uploader_id': 'TheYoungTurks',
2747 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2748 'upload_date': '20150715',
2749 'license': 'Standard YouTube License',
2750 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2751 'categories': ['News & Politics'],
2752 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2753 'like_count': int,
2754 'dislike_count': int,
2755 },
2756 'params': {
2757 'skip_download': True,
2758 },
2759 'only_matching': True,
2760 }, {
2761 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2762 'only_matching': True,
2763 }, {
2764 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2765 'only_matching': True,
3d3dddc9 2766 }, {
2767 'url': 'https://www.youtube.com/feed/trending',
2768 'only_matching': True,
2769 }, {
2770 # needs auth
2771 'url': 'https://www.youtube.com/feed/library',
2772 'only_matching': True,
2773 }, {
2774 # needs auth
2775 'url': 'https://www.youtube.com/feed/history',
2776 'only_matching': True,
2777 }, {
2778 # needs auth
2779 'url': 'https://www.youtube.com/feed/subscriptions',
2780 'only_matching': True,
2781 }, {
2782 # needs auth
2783 'url': 'https://www.youtube.com/feed/watch_later',
2784 'only_matching': True,
2785 }, {
2786 # no longer available?
2787 'url': 'https://www.youtube.com/feed/recommended',
2788 'only_matching': True,
29f7c58a 2789 }, {
2790 # inline playlist with not always working continuations
2791 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2792 'only_matching': True,
2793 }, {
2794 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2795 'only_matching': True,
2796 }, {
2797 'url': 'https://www.youtube.com/course',
2798 'only_matching': True,
2799 }, {
2800 'url': 'https://www.youtube.com/zsecurity',
2801 'only_matching': True,
2802 }, {
2803 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2804 'only_matching': True,
2805 }, {
2806 'url': 'https://www.youtube.com/TheYoungTurks/live',
2807 'only_matching': True,
39ed931e 2808 }, {
2809 'url': 'https://www.youtube.com/hashtag/cctv9',
2810 'info_dict': {
2811 'id': 'cctv9',
2812 'title': '#cctv9',
2813 },
2814 'playlist_mincount': 350,
201c1459 2815 }, {
2816 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2817 'only_matching': True,
29f7c58a 2818 }]
2819
2820 @classmethod
2821 def suitable(cls, url):
2822 return False if YoutubeIE.suitable(url) else super(
2823 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2824
2825 def _extract_channel_id(self, webpage):
2826 channel_id = self._html_search_meta(
2827 'channelId', webpage, 'channel id', default=None)
2828 if channel_id:
2829 return channel_id
2830 channel_url = self._html_search_meta(
2831 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2832 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2833 'twitter:app:url:googleplay'), webpage, 'channel url')
2834 return self._search_regex(
2835 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2836 channel_url, 'channel id')
15f6397c 2837
8bdd16b4 2838 @staticmethod
cd7c66cf 2839 def _extract_basic_item_renderer(item):
2840 # Modified from _extract_grid_item_renderer
201c1459 2841 known_basic_renderers = (
2842 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 2843 )
2844 for key, renderer in item.items():
201c1459 2845 if not isinstance(renderer, dict):
cd7c66cf 2846 continue
201c1459 2847 elif key in known_basic_renderers:
2848 return renderer
2849 elif key.startswith('grid') and key.endswith('Renderer'):
2850 return renderer
8bdd16b4 2851
8bdd16b4 2852 def _grid_entries(self, grid_renderer):
2853 for item in grid_renderer['items']:
2854 if not isinstance(item, dict):
39b62db1 2855 continue
cd7c66cf 2856 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2857 if not isinstance(renderer, dict):
2858 continue
2859 title = try_get(
201c1459 2860 renderer, (lambda x: x['title']['runs'][0]['text'],
2861 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 2862 # playlist
2863 playlist_id = renderer.get('playlistId')
2864 if playlist_id:
2865 yield self.url_result(
2866 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2867 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2868 video_title=title)
201c1459 2869 continue
8bdd16b4 2870 # video
2871 video_id = renderer.get('videoId')
2872 if video_id:
2873 yield self._extract_video(renderer)
201c1459 2874 continue
8bdd16b4 2875 # channel
2876 channel_id = renderer.get('channelId')
2877 if channel_id:
2878 title = try_get(
2879 renderer, lambda x: x['title']['simpleText'], compat_str)
2880 yield self.url_result(
2881 'https://www.youtube.com/channel/%s' % channel_id,
2882 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 2883 continue
2884 # generic endpoint URL support
2885 ep_url = urljoin('https://www.youtube.com/', try_get(
2886 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
2887 compat_str))
2888 if ep_url:
2889 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
2890 if ie.suitable(ep_url):
2891 yield self.url_result(
2892 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
2893 break
8bdd16b4 2894
3d3dddc9 2895 def _shelf_entries_from_content(self, shelf_renderer):
2896 content = shelf_renderer.get('content')
2897 if not isinstance(content, dict):
8bdd16b4 2898 return
cd7c66cf 2899 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2900 if renderer:
2901 # TODO: add support for nested playlists so each shelf is processed
2902 # as separate playlist
2903 # TODO: this includes only first N items
2904 for entry in self._grid_entries(renderer):
2905 yield entry
2906 renderer = content.get('horizontalListRenderer')
2907 if renderer:
2908 # TODO
2909 pass
8bdd16b4 2910
29f7c58a 2911 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2912 ep = try_get(
2913 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2914 compat_str)
2915 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2916 if shelf_url:
29f7c58a 2917 # Skipping links to another channels, note that checking for
2918 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2919 # will not work
2920 if skip_channels and '/channels?' in shelf_url:
2921 return
3d3dddc9 2922 title = try_get(
2923 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2924 yield self.url_result(shelf_url, video_title=title)
2925 # Shelf may not contain shelf URL, fallback to extraction from content
2926 for entry in self._shelf_entries_from_content(shelf_renderer):
2927 yield entry
c5e8d7af 2928
8bdd16b4 2929 def _playlist_entries(self, video_list_renderer):
2930 for content in video_list_renderer['contents']:
2931 if not isinstance(content, dict):
2932 continue
2933 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2934 if not isinstance(renderer, dict):
2935 continue
2936 video_id = renderer.get('videoId')
2937 if not video_id:
2938 continue
2939 yield self._extract_video(renderer)
07aeced6 2940
3462ffa8 2941 def _rich_entries(self, rich_grid_renderer):
2942 renderer = try_get(
70d5c17b 2943 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2944 video_id = renderer.get('videoId')
2945 if not video_id:
2946 return
2947 yield self._extract_video(renderer)
2948
8bdd16b4 2949 def _video_entry(self, video_renderer):
2950 video_id = video_renderer.get('videoId')
2951 if video_id:
2952 return self._extract_video(video_renderer)
dacb3a86 2953
8bdd16b4 2954 def _post_thread_entries(self, post_thread_renderer):
2955 post_renderer = try_get(
2956 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2957 if not post_renderer:
2958 return
2959 # video attachment
2960 video_renderer = try_get(
895b0931 2961 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
2962 video_id = video_renderer.get('videoId')
2963 if video_id:
2964 entry = self._extract_video(video_renderer)
8bdd16b4 2965 if entry:
2966 yield entry
895b0931 2967 # playlist attachment
2968 playlist_id = try_get(
2969 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
2970 if playlist_id:
2971 yield self.url_result(
e28f1c0a 2972 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2973 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 2974 # inline video links
2975 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2976 for run in runs:
2977 if not isinstance(run, dict):
2978 continue
2979 ep_url = try_get(
2980 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2981 if not ep_url:
2982 continue
2983 if not YoutubeIE.suitable(ep_url):
2984 continue
2985 ep_video_id = YoutubeIE._match_id(ep_url)
2986 if video_id == ep_video_id:
2987 continue
895b0931 2988 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 2989
8bdd16b4 2990 def _post_thread_continuation_entries(self, post_thread_continuation):
2991 contents = post_thread_continuation.get('contents')
2992 if not isinstance(contents, list):
2993 return
2994 for content in contents:
2995 renderer = content.get('backstagePostThreadRenderer')
2996 if not isinstance(renderer, dict):
2997 continue
2998 for entry in self._post_thread_entries(renderer):
2999 yield entry
07aeced6 3000
39ed931e 3001 r''' # unused
3002 def _rich_grid_entries(self, contents):
3003 for content in contents:
3004 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3005 if video_renderer:
3006 entry = self._video_entry(video_renderer)
3007 if entry:
3008 yield entry
3009 '''
3010
29f7c58a 3011 @staticmethod
3012 def _build_continuation_query(continuation, ctp=None):
3013 query = {
3014 'ctoken': continuation,
3015 'continuation': continuation,
3016 }
3017 if ctp:
3018 query['itct'] = ctp
3019 return query
3020
8bdd16b4 3021 @staticmethod
3022 def _extract_next_continuation_data(renderer):
3023 next_continuation = try_get(
3024 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3025 if not next_continuation:
3026 return
3027 continuation = next_continuation.get('continuation')
3028 if not continuation:
3029 return
3030 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3031 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3032
8bdd16b4 3033 @classmethod
3034 def _extract_continuation(cls, renderer):
3035 next_continuation = cls._extract_next_continuation_data(renderer)
3036 if next_continuation:
3037 return next_continuation
cc2db878 3038 contents = []
3039 for key in ('contents', 'items'):
3040 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3041 for content in contents:
3042 if not isinstance(content, dict):
3043 continue
3044 continuation_ep = try_get(
3045 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3046 dict)
3047 if not continuation_ep:
3048 continue
3049 continuation = try_get(
3050 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3051 if not continuation:
3052 continue
3053 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3054 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3055
f4f751af 3056 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3057
70d5c17b 3058 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3059 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3060 for content in contents:
3061 if not isinstance(content, dict):
8bdd16b4 3062 continue
70d5c17b 3063 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3064 if not is_renderer:
70d5c17b 3065 renderer = content.get('richItemRenderer')
3462ffa8 3066 if renderer:
3067 for entry in self._rich_entries(renderer):
3068 yield entry
3069 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3070 continue
3462ffa8 3071 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3072 for isr_content in isr_contents:
3073 if not isinstance(isr_content, dict):
3074 continue
69184e41 3075
3076 known_renderers = {
3077 'playlistVideoListRenderer': self._playlist_entries,
3078 'gridRenderer': self._grid_entries,
3079 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3080 'backstagePostThreadRenderer': self._post_thread_entries,
3081 'videoRenderer': lambda x: [self._video_entry(x)],
3082 }
3083 for key, renderer in isr_content.items():
3084 if key not in known_renderers:
3085 continue
3086 for entry in known_renderers[key](renderer):
3087 if entry:
3088 yield entry
3462ffa8 3089 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3090 break
70d5c17b 3091
3462ffa8 3092 if not continuation_list[0]:
3093 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3094
3095 if not continuation_list[0]:
3096 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3097
3098 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3099 tab_content = try_get(tab, lambda x: x['content'], dict)
3100 if not tab_content:
3101 return
3462ffa8 3102 parent_renderer = (
29f7c58a 3103 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3104 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3105 for entry in extract_entries(parent_renderer):
3106 yield entry
3462ffa8 3107 continuation = continuation_list[0]
f4f751af 3108 context = self._extract_context(ytcfg)
3109 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3110
8bdd16b4 3111 for page_num in itertools.count(1):
3112 if not continuation:
3113 break
79360d99 3114 query = {
3115 'continuation': continuation['continuation'],
3116 'clickTracking': {'clickTrackingParams': continuation['itct']}
3117 }
f4f751af 3118 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3119 response = self._extract_response(
3120 item_id='%s page %s' % (item_id, page_num),
3121 query=query, headers=headers, ytcfg=ytcfg,
3122 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3123
3124 if not response:
8bdd16b4 3125 break
f4f751af 3126 visitor_data = try_get(
3127 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3128
69184e41 3129 known_continuation_renderers = {
3130 'playlistVideoListContinuation': self._playlist_entries,
3131 'gridContinuation': self._grid_entries,
3132 'itemSectionContinuation': self._post_thread_continuation_entries,
3133 'sectionListContinuation': extract_entries, # for feeds
3134 }
8bdd16b4 3135 continuation_contents = try_get(
69184e41 3136 response, lambda x: x['continuationContents'], dict) or {}
3137 continuation_renderer = None
3138 for key, value in continuation_contents.items():
3139 if key not in known_continuation_renderers:
3462ffa8 3140 continue
69184e41 3141 continuation_renderer = value
3142 continuation_list = [None]
3143 for entry in known_continuation_renderers[key](continuation_renderer):
3144 yield entry
3145 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3146 break
3147 if continuation_renderer:
3148 continue
c5e8d7af 3149
a1b535bd 3150 known_renderers = {
3151 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3152 'gridVideoRenderer': (self._grid_entries, 'items'),
3153 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3154 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3155 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3156 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3157 }
cce889b9 3158 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3159 continuation_items = try_get(
cce889b9 3160 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3161 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3162 video_items_renderer = None
3163 for key, value in continuation_item.items():
3164 if key not in known_renderers:
8bdd16b4 3165 continue
a1b535bd 3166 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3167 continuation_list = [None]
a1b535bd 3168 for entry in known_renderers[key][0](video_items_renderer):
3169 yield entry
9ba5705a 3170 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3171 break
3172 if video_items_renderer:
3173 continue
8bdd16b4 3174 break
9558dcec 3175
8bdd16b4 3176 @staticmethod
3177 def _extract_selected_tab(tabs):
3178 for tab in tabs:
3179 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3180 return tab['tabRenderer']
2b3c2546 3181 else:
8bdd16b4 3182 raise ExtractorError('Unable to find selected tab')
b82f815f 3183
8bdd16b4 3184 @staticmethod
3185 def _extract_uploader(data):
3186 uploader = {}
3187 sidebar_renderer = try_get(
3188 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3189 if sidebar_renderer:
3190 for item in sidebar_renderer:
3191 if not isinstance(item, dict):
3192 continue
3193 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3194 if not isinstance(renderer, dict):
3195 continue
3196 owner = try_get(
3197 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3198 if owner:
3199 uploader['uploader'] = owner.get('text')
3200 uploader['uploader_id'] = try_get(
3201 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3202 uploader['uploader_url'] = urljoin(
3203 'https://www.youtube.com/',
3204 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3205 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3206
d069eca7 3207 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3208 playlist_id = title = description = channel_url = channel_name = channel_id = None
3209 thumbnails_list = tags = []
3210
8bdd16b4 3211 selected_tab = self._extract_selected_tab(tabs)
3212 renderer = try_get(
3213 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3214 if renderer:
b60419c5 3215 channel_name = renderer.get('title')
3216 channel_url = renderer.get('channelUrl')
3217 channel_id = renderer.get('externalId')
39ed931e 3218 else:
64c0d954 3219 renderer = try_get(
3220 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3221
8bdd16b4 3222 if renderer:
3223 title = renderer.get('title')
ecc97af3 3224 description = renderer.get('description', '')
b60419c5 3225 playlist_id = channel_id
3226 tags = renderer.get('keywords', '').split()
3227 thumbnails_list = (
3228 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3229 or try_get(
3230 data,
3231 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3232 list)
b60419c5 3233 or [])
3234
3235 thumbnails = []
3236 for t in thumbnails_list:
3237 if not isinstance(t, dict):
3238 continue
3239 thumbnail_url = url_or_none(t.get('url'))
3240 if not thumbnail_url:
3241 continue
3242 thumbnails.append({
3243 'url': thumbnail_url,
3244 'width': int_or_none(t.get('width')),
3245 'height': int_or_none(t.get('height')),
3246 })
3462ffa8 3247 if playlist_id is None:
70d5c17b 3248 playlist_id = item_id
3249 if title is None:
39ed931e 3250 title = (
3251 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3252 or playlist_id)
b60419c5 3253 title += format_field(selected_tab, 'title', ' - %s')
3254
3255 metadata = {
3256 'playlist_id': playlist_id,
3257 'playlist_title': title,
3258 'playlist_description': description,
3259 'uploader': channel_name,
3260 'uploader_id': channel_id,
3261 'uploader_url': channel_url,
3262 'thumbnails': thumbnails,
3263 'tags': tags,
3264 }
3265 if not channel_id:
3266 metadata.update(self._extract_uploader(data))
3267 metadata.update({
3268 'channel': metadata['uploader'],
3269 'channel_id': metadata['uploader_id'],
3270 'channel_url': metadata['uploader_url']})
3271 return self.playlist_result(
d069eca7
M
3272 self._entries(
3273 selected_tab, playlist_id,
3274 self._extract_identity_token(webpage, item_id),
f4f751af 3275 self._extract_account_syncid(data),
3276 self._extract_ytcfg(item_id, webpage)),
b60419c5 3277 **metadata)
73c4ac2c 3278
79360d99 3279 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3280 first_id = last_id = None
79360d99 3281 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3282 headers = self._generate_api_headers(
3283 ytcfg, account_syncid=self._extract_account_syncid(data),
3284 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3285 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3286 for page_num in itertools.count(1):
cd7c66cf 3287 videos = list(self._playlist_entries(playlist))
3288 if not videos:
3289 return
2be71994 3290 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3291 if start >= len(videos):
3292 return
3293 for video in videos[start:]:
3294 if video['id'] == first_id:
3295 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3296 return
3297 yield video
3298 first_id = first_id or videos[0]['id']
3299 last_id = videos[-1]['id']
79360d99 3300 watch_endpoint = try_get(
3301 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3302 query = {
3303 'playlistId': playlist_id,
3304 'videoId': watch_endpoint.get('videoId') or last_id,
3305 'index': watch_endpoint.get('index') or len(videos),
3306 'params': watch_endpoint.get('params') or 'OAE%3D'
3307 }
3308 response = self._extract_response(
3309 item_id='%s page %d' % (playlist_id, page_num),
3310 query=query,
3311 ep='next',
3312 headers=headers,
3313 check_get_keys='contents'
3314 )
cd7c66cf 3315 playlist = try_get(
79360d99 3316 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3317
79360d99 3318 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3319 title = playlist.get('title') or try_get(
3320 data, lambda x: x['titleText']['simpleText'], compat_str)
3321 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3322
3323 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3324 playlist_url = urljoin(url, try_get(
3325 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3326 compat_str))
3327 if playlist_url and playlist_url != url:
3328 return self.url_result(
3329 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3330 video_title=title)
cd7c66cf 3331
8bdd16b4 3332 return self.playlist_result(
79360d99 3333 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3334 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3335
f3eaa8dd
M
3336 def _extract_alerts(self, data, expected=False):
3337
3338 def _real_extract_alerts():
3339 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3340 if not isinstance(alert_dict, dict):
02ced43c 3341 continue
f3eaa8dd
M
3342 for alert in alert_dict.values():
3343 alert_type = alert.get('type')
3344 if not alert_type:
3345 continue
3ffc7c89 3346 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
02ced43c 3347 if message:
3348 yield alert_type, message
f3eaa8dd 3349 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3ffc7c89 3350 message += try_get(run, lambda x: x['text'], compat_str)
3351 if message:
3352 yield alert_type, message
f3eaa8dd 3353
3ffc7c89 3354 errors = []
3355 warnings = []
f3eaa8dd
M
3356 for alert_type, alert_message in _real_extract_alerts():
3357 if alert_type.lower() == 'error':
3ffc7c89 3358 errors.append([alert_type, alert_message])
f3eaa8dd 3359 else:
3ffc7c89 3360 warnings.append([alert_type, alert_message])
f3eaa8dd 3361
3ffc7c89 3362 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3363 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3364 if errors:
3365 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3366
358de58c 3367 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3368 """
3369 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3370 """
3371 sidebar_renderer = try_get(
5d342002 3372 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3373 if not sidebar_renderer:
3374 return
3375 browse_id = params = None
358de58c 3376 for item in sidebar_renderer:
3377 if not isinstance(item, dict):
3378 continue
3379 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3380 menu_renderer = try_get(
3381 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3382 for menu_item in menu_renderer:
3383 if not isinstance(menu_item, dict):
3384 continue
3385 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3386 text = try_get(
3387 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3388 if not text or text.lower() != 'show unavailable videos':
3389 continue
3390 browse_endpoint = try_get(
3391 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3392 browse_id = browse_endpoint.get('browseId')
3393 params = browse_endpoint.get('params')
5d342002 3394 break
3395
3396 ytcfg = self._extract_ytcfg(item_id, webpage)
3397 headers = self._generate_api_headers(
3398 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3399 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3400 visitor_data=try_get(
3401 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3402 query = {
3403 'params': params or 'wgYCCAA=',
3404 'browseId': browse_id or 'VL%s' % item_id
3405 }
3406 return self._extract_response(
3407 item_id=item_id, headers=headers, query=query,
3408 check_get_keys='contents', fatal=False,
3409 note='Downloading API JSON with unavailable videos')
358de58c 3410
79360d99 3411 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3412 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3413 response = None
3414 last_error = None
3415 count = -1
3416 retries = self._downloader.params.get('extractor_retries', 3)
3417 if check_get_keys is None:
3418 check_get_keys = []
3419 while count < retries:
3420 count += 1
3421 if last_error:
3422 self.report_warning('%s. Retrying ...' % last_error)
3423 try:
3424 response = self._call_api(
3425 ep=ep, fatal=True, headers=headers,
358de58c 3426 video_id=item_id, query=query,
79360d99 3427 context=self._extract_context(ytcfg),
3428 api_key=self._extract_api_key(ytcfg),
3429 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3430 except ExtractorError as e:
3431 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3432 # Downloading page may result in intermittent 5xx HTTP error
3433 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3434 last_error = 'HTTP Error %s' % e.cause.code
3435 if count < retries:
3436 continue
358de58c 3437 if fatal:
3438 raise
3439 else:
3440 self.report_warning(error_to_compat_str(e))
3441 return
3442
79360d99 3443 else:
3444 # Youtube may send alerts if there was an issue with the continuation page
3445 self._extract_alerts(response, expected=False)
3446 if not check_get_keys or dict_get(response, check_get_keys):
3447 break
3448 # Youtube sometimes sends incomplete data
3449 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3450 last_error = 'Incomplete data received'
3451 if count >= retries:
358de58c 3452 if fatal:
3453 raise ExtractorError(last_error)
3454 else:
3455 self.report_warning(last_error)
3456 return
79360d99 3457 return response
3458
cd7c66cf 3459 def _extract_webpage(self, url, item_id):
62bff2c1 3460 retries = self._downloader.params.get('extractor_retries', 3)
3461 count = -1
c705177d 3462 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3463 while count < retries:
62bff2c1 3464 count += 1
14fdfea9 3465 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3466 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3467 if count:
c705177d 3468 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3469 webpage = self._download_webpage(
3470 url, item_id,
cd7c66cf 3471 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3472 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3473 self._extract_alerts(data, expected=True)
14fdfea9 3474 if data.get('contents') or data.get('currentVideoEndpoint'):
3475 break
c705177d 3476 if count >= retries:
6a39ee13 3477 raise ExtractorError(last_error)
cd7c66cf 3478 return webpage, data
3479
3480 def _real_extract(self, url):
3481 item_id = self._match_id(url)
3482 url = compat_urlparse.urlunparse(
3483 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3484
3485 # This is not matched in a channel page with a tab selected
3486 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3487 mobj = mobj.groupdict() if mobj else {}
3488 if mobj and not mobj.get('not_channel'):
6a39ee13 3489 self.report_warning(
cd7c66cf 3490 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3491 'To download only the videos in the home page, add a "/featured" to the URL')
3492 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3493
3494 # Handle both video/playlist URLs
201c1459 3495 qs = parse_qs(url)
cd7c66cf 3496 video_id = qs.get('v', [None])[0]
3497 playlist_id = qs.get('list', [None])[0]
3498
3499 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3500 if not playlist_id:
3501 # If there is neither video or playlist ids,
3502 # youtube redirects to home page, which is undesirable
3503 raise ExtractorError('Unable to recognize tab page')
6a39ee13 3504 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3505 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3506
3507 if video_id and playlist_id:
3508 if self._downloader.params.get('noplaylist'):
3509 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3510 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3511 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3512
3513 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3514
358de58c 3515 # YouTube sometimes provides a button to reload playlist with unavailable videos.
3516 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
3517
8bdd16b4 3518 tabs = try_get(
3519 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3520 if tabs:
d069eca7 3521 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3522
8bdd16b4 3523 playlist = try_get(
3524 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3525 if playlist:
79360d99 3526 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3527
a0566bbf 3528 video_id = try_get(
3529 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3530 compat_str) or video_id
8bdd16b4 3531 if video_id:
6a39ee13 3532 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3533 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3534
8bdd16b4 3535 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3536
c5e8d7af 3537
8bdd16b4 3538class YoutubePlaylistIE(InfoExtractor):
3539 IE_DESC = 'YouTube.com playlists'
3540 _VALID_URL = r'''(?x)(?:
3541 (?:https?://)?
3542 (?:\w+\.)?
3543 (?:
3544 (?:
3545 youtube(?:kids)?\.com|
29f7c58a 3546 invidio\.us
8bdd16b4 3547 )
3548 /.*?\?.*?\blist=
3549 )?
3550 (?P<id>%(playlist_id)s)
3551 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3552 IE_NAME = 'youtube:playlist'
cdc628a4 3553 _TESTS = [{
8bdd16b4 3554 'note': 'issue #673',
3555 'url': 'PLBB231211A4F62143',
cdc628a4 3556 'info_dict': {
8bdd16b4 3557 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3558 'id': 'PLBB231211A4F62143',
3559 'uploader': 'Wickydoo',
3560 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3561 },
3562 'playlist_mincount': 29,
3563 }, {
3564 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3565 'info_dict': {
3566 'title': 'YDL_safe_search',
3567 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3568 },
3569 'playlist_count': 2,
3570 'skip': 'This playlist is private',
9558dcec 3571 }, {
8bdd16b4 3572 'note': 'embedded',
3573 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3574 'playlist_count': 4,
9558dcec 3575 'info_dict': {
8bdd16b4 3576 'title': 'JODA15',
3577 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3578 'uploader': 'milan',
3579 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3580 }
cdc628a4 3581 }, {
8bdd16b4 3582 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3583 'playlist_mincount': 982,
3584 'info_dict': {
3585 'title': '2018 Chinese New Singles (11/6 updated)',
3586 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3587 'uploader': 'LBK',
3588 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3589 }
daa0df9e 3590 }, {
29f7c58a 3591 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3592 'only_matching': True,
3593 }, {
3594 # music album playlist
3595 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3596 'only_matching': True,
3597 }]
3598
3599 @classmethod
3600 def suitable(cls, url):
201c1459 3601 if YoutubeTabIE.suitable(url):
3602 return False
1bdae7d3 3603 # Hack for lazy extractors until more generic solution is implemented
3604 # (see #28780)
3605 from .youtube import parse_qs
201c1459 3606 qs = parse_qs(url)
3607 if qs.get('v', [None])[0]:
3608 return False
3609 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3610
3611 def _real_extract(self, url):
3612 playlist_id = self._match_id(url)
201c1459 3613 qs = parse_qs(url)
29f7c58a 3614 if not qs:
3615 qs = {'list': playlist_id}
3616 return self.url_result(
3617 update_url_query('https://www.youtube.com/playlist', qs),
3618 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3619
3620
3621class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3622 IE_DESC = 'youtu.be'
29f7c58a 3623 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3624 _TESTS = [{
8bdd16b4 3625 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3626 'info_dict': {
3627 'id': 'yeWKywCrFtk',
3628 'ext': 'mp4',
3629 'title': 'Small Scale Baler and Braiding Rugs',
3630 'uploader': 'Backus-Page House Museum',
3631 'uploader_id': 'backuspagemuseum',
3632 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3633 'upload_date': '20161008',
3634 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3635 'categories': ['Nonprofits & Activism'],
3636 'tags': list,
3637 'like_count': int,
3638 'dislike_count': int,
3639 },
3640 'params': {
3641 'noplaylist': True,
3642 'skip_download': True,
3643 },
39e7107d 3644 }, {
8bdd16b4 3645 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3646 'only_matching': True,
cdc628a4
PH
3647 }]
3648
8bdd16b4 3649 def _real_extract(self, url):
29f7c58a 3650 mobj = re.match(self._VALID_URL, url)
3651 video_id = mobj.group('id')
3652 playlist_id = mobj.group('playlist_id')
8bdd16b4 3653 return self.url_result(
29f7c58a 3654 update_url_query('https://www.youtube.com/watch', {
3655 'v': video_id,
3656 'list': playlist_id,
3657 'feature': 'youtu.be',
3658 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3659
3660
3661class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3662 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3663 _VALID_URL = r'ytuser:(?P<id>.+)'
3664 _TESTS = [{
3665 'url': 'ytuser:phihag',
3666 'only_matching': True,
3667 }]
3668
3669 def _real_extract(self, url):
3670 user_id = self._match_id(url)
3671 return self.url_result(
3672 'https://www.youtube.com/user/%s' % user_id,
3673 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3674
b05654f0 3675
3d3dddc9 3676class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3677 IE_NAME = 'youtube:favorites'
3678 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3679 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3680 _LOGIN_REQUIRED = True
3681 _TESTS = [{
3682 'url': ':ytfav',
3683 'only_matching': True,
3684 }, {
3685 'url': ':ytfavorites',
3686 'only_matching': True,
3687 }]
3688
3689 def _real_extract(self, url):
3690 return self.url_result(
3691 'https://www.youtube.com/playlist?list=LL',
3692 ie=YoutubeTabIE.ie_key())
3693
3694
79360d99 3695class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3696 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3697 # there doesn't appear to be a real limit, for example if you search for
3698 # 'python' you get more than 8.000.000 results
3699 _MAX_RESULTS = float('inf')
78caa52a 3700 IE_NAME = 'youtube:search'
b05654f0 3701 _SEARCH_KEY = 'ytsearch'
6c894ea1 3702 _SEARCH_PARAMS = None
9dd8e46a 3703 _TESTS = []
b05654f0 3704
6c894ea1 3705 def _entries(self, query, n):
a5c56234 3706 data = {'query': query}
6c894ea1
U
3707 if self._SEARCH_PARAMS:
3708 data['params'] = self._SEARCH_PARAMS
3709 total = 0
3710 for page_num in itertools.count(1):
79360d99 3711 search = self._extract_response(
3712 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3713 check_get_keys=('contents', 'onResponseReceivedCommands')
3714 )
6c894ea1 3715 if not search:
b4c08069 3716 break
6c894ea1
U
3717 slr_contents = try_get(
3718 search,
3719 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3720 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3721 list)
3722 if not slr_contents:
a22b2fd1 3723 break
0366ae87 3724
0366ae87
M
3725 # Youtube sometimes adds promoted content to searches,
3726 # changing the index location of videos and token.
3727 # So we search through all entries till we find them.
30a074c2 3728 continuation_token = None
3729 for slr_content in slr_contents:
a96c6d15 3730 if continuation_token is None:
3731 continuation_token = try_get(
3732 slr_content,
3733 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3734 compat_str)
3735
30a074c2 3736 isr_contents = try_get(
3737 slr_content,
3738 lambda x: x['itemSectionRenderer']['contents'],
3739 list)
9da76d30 3740 if not isr_contents:
30a074c2 3741 continue
3742 for content in isr_contents:
3743 if not isinstance(content, dict):
3744 continue
3745 video = content.get('videoRenderer')
3746 if not isinstance(video, dict):
3747 continue
3748 video_id = video.get('videoId')
3749 if not video_id:
3750 continue
3751
3752 yield self._extract_video(video)
3753 total += 1
3754 if total == n:
3755 return
0366ae87 3756
0366ae87 3757 if not continuation_token:
6c894ea1 3758 break
0366ae87 3759 data['continuation'] = continuation_token
b05654f0 3760
6c894ea1
U
3761 def _get_n_results(self, query, n):
3762 """Get a specified number of results for a query"""
3763 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3764
c9ae7b95 3765
a3dd9248 3766class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3767 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3768 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3769 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3770 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3771
c9ae7b95 3772
386e1dd9 3773class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3774 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3775 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3776 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3777 # _MAX_RESULTS = 100
3462ffa8 3778 _TESTS = [{
3779 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3780 'playlist_mincount': 5,
3781 'info_dict': {
3782 'title': 'youtube-dl test video',
3783 }
3784 }, {
3785 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3786 'only_matching': True,
3787 }]
3788
386e1dd9 3789 @classmethod
3790 def _make_valid_url(cls):
3791 return cls._VALID_URL
3792
3462ffa8 3793 def _real_extract(self, url):
386e1dd9 3794 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3795 query = (qs.get('search_query') or qs.get('q'))[0]
3796 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3797 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3798
3799
3800class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3801 """
25f14e9f 3802 Base class for feed extractors
3d3dddc9 3803 Subclasses must define the _FEED_NAME property.
d7ae0639 3804 """
b2e8bc1b 3805 _LOGIN_REQUIRED = True
ef2f3c7f 3806 _TESTS = []
d7ae0639
JMF
3807
3808 @property
3809 def IE_NAME(self):
78caa52a 3810 return 'youtube:%s' % self._FEED_NAME
04cc9617 3811
81f0259b 3812 def _real_initialize(self):
b2e8bc1b 3813 self._login()
81f0259b 3814
3853309f 3815 def _real_extract(self, url):
3d3dddc9 3816 return self.url_result(
3817 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3818 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3819
3820
ef2f3c7f 3821class YoutubeWatchLaterIE(InfoExtractor):
3822 IE_NAME = 'youtube:watchlater'
70d5c17b 3823 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3824 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3825 _TESTS = [{
8bdd16b4 3826 'url': ':ytwatchlater',
bc7a9cd8
S
3827 'only_matching': True,
3828 }]
25f14e9f
S
3829
3830 def _real_extract(self, url):
ef2f3c7f 3831 return self.url_result(
3832 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3833
3834
25f14e9f
S
3835class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3836 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3837 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3838 _FEED_NAME = 'recommended'
3d3dddc9 3839 _TESTS = [{
3840 'url': ':ytrec',
3841 'only_matching': True,
3842 }, {
3843 'url': ':ytrecommended',
3844 'only_matching': True,
3845 }, {
3846 'url': 'https://youtube.com',
3847 'only_matching': True,
3848 }]
1ed5b5c9 3849
1ed5b5c9 3850
25f14e9f 3851class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3852 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3853 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3854 _FEED_NAME = 'subscriptions'
3d3dddc9 3855 _TESTS = [{
3856 'url': ':ytsubs',
3857 'only_matching': True,
3858 }, {
3859 'url': ':ytsubscriptions',
3860 'only_matching': True,
3861 }]
1ed5b5c9 3862
1ed5b5c9 3863
25f14e9f 3864class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3865 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3866 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3867 _FEED_NAME = 'history'
3d3dddc9 3868 _TESTS = [{
3869 'url': ':ythistory',
3870 'only_matching': True,
3871 }]
1ed5b5c9
JMF
3872
3873
15870e90
PH
3874class YoutubeTruncatedURLIE(InfoExtractor):
3875 IE_NAME = 'youtube:truncated_url'
3876 IE_DESC = False # Do not list
975d35db 3877 _VALID_URL = r'''(?x)
b95aab84
PH
3878 (?:https?://)?
3879 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3880 (?:watch\?(?:
c4808c60 3881 feature=[a-z_]+|
b95aab84
PH
3882 annotation_id=annotation_[^&]+|
3883 x-yt-cl=[0-9]+|
c1708b89 3884 hl=[^&]*|
287be8c6 3885 t=[0-9]+
b95aab84
PH
3886 )?
3887 |
3888 attribution_link\?a=[^&]+
3889 )
3890 $
975d35db 3891 '''
15870e90 3892
c4808c60 3893 _TESTS = [{
2d3d2997 3894 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3895 'only_matching': True,
dc2fc736 3896 }, {
2d3d2997 3897 'url': 'https://www.youtube.com/watch?',
dc2fc736 3898 'only_matching': True,
b95aab84
PH
3899 }, {
3900 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3901 'only_matching': True,
3902 }, {
3903 'url': 'https://www.youtube.com/watch?feature=foo',
3904 'only_matching': True,
c1708b89
PH
3905 }, {
3906 'url': 'https://www.youtube.com/watch?hl=en-GB',
3907 'only_matching': True,
287be8c6
PH
3908 }, {
3909 'url': 'https://www.youtube.com/watch?t=2372',
3910 'only_matching': True,
c4808c60
PH
3911 }]
3912
15870e90
PH
3913 def _real_extract(self, url):
3914 raise ExtractorError(
78caa52a
PH
3915 'Did you forget to quote the URL? Remember that & is a meta '
3916 'character in most shells, so you want to put the URL in quotes, '
3867038a 3917 'like youtube-dl '
2d3d2997 3918 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3919 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3920 expected=True)
772fd5cc
PH
3921
3922
3923class YoutubeTruncatedIDIE(InfoExtractor):
3924 IE_NAME = 'youtube:truncated_id'
3925 IE_DESC = False # Do not list
b95aab84 3926 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3927
3928 _TESTS = [{
3929 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3930 'only_matching': True,
3931 }]
3932
3933 def _real_extract(self, url):
3934 video_id = self._match_id(url)
3935 raise ExtractorError(
3936 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3937 expected=True)