]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Add field `name` for subtitles
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
cd7c66cf 70 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
68b91dc9 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
68217024 88 username, password = self._get_login_info()
b2e8bc1b
JMF
89 # No authentication to be performed
90 if username is None:
70d35d16 91 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 93 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
94 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 95 return True
b2e8bc1b 96
7cc3570e
PH
97 login_page = self._download_webpage(
98 self._LOGIN_URL, None,
69ea8ca4
PH
99 note='Downloading login page',
100 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
101 if login_page is False:
102 return
b2e8bc1b 103
1212e997 104 login_form = self._hidden_inputs(login_page)
c5e8d7af 105
e00eb564
S
106 def req(url, f_req, note, errnote):
107 data = login_form.copy()
108 data.update({
109 'pstMsg': 1,
110 'checkConnection': 'youtube',
111 'checkedDomains': 'youtube',
112 'hl': 'en',
113 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 114 'f.req': json.dumps(f_req),
e00eb564
S
115 'flowName': 'GlifWebSignIn',
116 'flowEntry': 'ServiceLogin',
baf67a60
S
117 # TODO: reverse actual botguard identifier generation algo
118 'bgRequest': '["identifier",""]',
041bc3ad 119 })
e00eb564
S
120 return self._download_json(
121 url, None, note=note, errnote=errnote,
122 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
123 fatal=False,
124 data=urlencode_postdata(data), headers={
125 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
126 'Google-Accounts-XSRF': 1,
127 })
128
3995d37d 129 def warn(message):
6a39ee13 130 self.report_warning(message)
3995d37d
S
131
132 lookup_req = [
133 username,
134 None, [], None, 'US', None, None, 2, False, True,
135 [
136 None, None,
137 [2, 1, None, 1,
138 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
139 None, [], 4],
140 1, [None, None, []], None, None, None, True
141 ],
142 username,
143 ]
144
e00eb564 145 lookup_results = req(
3995d37d 146 self._LOOKUP_URL, lookup_req,
e00eb564
S
147 'Looking up account info', 'Unable to look up account info')
148
149 if lookup_results is False:
150 return False
041bc3ad 151
3995d37d
S
152 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
153 if not user_hash:
154 warn('Unable to extract user hash')
155 return False
156
157 challenge_req = [
158 user_hash,
159 None, 1, None, [1, None, None, None, [password, None, True]],
160 [
161 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
162 1, [None, None, []], None, None, None, True
163 ]]
83317f69 164
3995d37d
S
165 challenge_results = req(
166 self._CHALLENGE_URL, challenge_req,
167 'Logging in', 'Unable to log in')
83317f69 168
3995d37d 169 if challenge_results is False:
e00eb564 170 return
83317f69 171
3995d37d
S
172 login_res = try_get(challenge_results, lambda x: x[0][5], list)
173 if login_res:
174 login_msg = try_get(login_res, lambda x: x[5], compat_str)
175 warn(
176 'Unable to login: %s' % 'Invalid password'
177 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
178 return False
179
180 res = try_get(challenge_results, lambda x: x[0][-1], list)
181 if not res:
182 warn('Unable to extract result entry')
183 return False
184
9a6628aa
S
185 login_challenge = try_get(res, lambda x: x[0][0], list)
186 if login_challenge:
187 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
188 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
189 # SEND_SUCCESS - TFA code has been successfully sent to phone
190 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 191 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
192 if status == 'QUOTA_EXCEEDED':
193 warn('Exceeded the limit of TFA codes, try later')
194 return False
195
196 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
197 if not tl:
198 warn('Unable to extract TL')
199 return False
200
201 tfa_code = self._get_tfa_info('2-step verification code')
202
203 if not tfa_code:
204 warn(
205 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
206 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
207 return False
208
209 tfa_code = remove_start(tfa_code, 'G-')
210
211 tfa_req = [
212 user_hash, None, 2, None,
213 [
214 9, None, None, None, None, None, None, None,
215 [None, tfa_code, True, 2]
216 ]]
217
218 tfa_results = req(
219 self._TFA_URL.format(tl), tfa_req,
220 'Submitting TFA code', 'Unable to submit TFA code')
221
222 if tfa_results is False:
223 return False
224
225 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
226 if tfa_res:
227 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
228 warn(
229 'Unable to finish TFA: %s' % 'Invalid TFA code'
230 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
231 return False
232
233 check_cookie_url = try_get(
234 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
235 else:
236 CHALLENGES = {
237 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
238 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
239 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
240 }
241 challenge = CHALLENGES.get(
242 challenge_str,
243 '%s returned error %s.' % (self.IE_NAME, challenge_str))
244 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
245 return False
3995d37d
S
246 else:
247 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
248
249 if not check_cookie_url:
250 warn('Unable to extract CheckCookie URL')
251 return False
e00eb564
S
252
253 check_cookie_results = self._download_webpage(
3995d37d
S
254 check_cookie_url, None, 'Checking cookie', fatal=False)
255
256 if check_cookie_results is False:
257 return False
e00eb564 258
3995d37d
S
259 if 'https://myaccount.google.com/' not in check_cookie_results:
260 warn('Unable to log in')
b2e8bc1b 261 return False
e00eb564 262
b2e8bc1b
JMF
263 return True
264
cce889b9 265 def _initialize_consent(self):
266 cookies = self._get_cookies('https://www.youtube.com/')
267 if cookies.get('__Secure-3PSID'):
268 return
269 consent_id = None
270 consent = cookies.get('CONSENT')
271 if consent:
272 if 'YES' in consent.value:
273 return
274 consent_id = self._search_regex(
275 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
276 if not consent_id:
277 consent_id = random.randint(100, 999)
278 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 279
b2e8bc1b 280 def _real_initialize(self):
cce889b9 281 self._initialize_consent()
b2e8bc1b
JMF
282 if self._downloader is None:
283 return
b2e8bc1b
JMF
284 if not self._login():
285 return
c5e8d7af 286
f4f751af 287 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
288 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 289 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 290 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
291 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 292
a5c56234
M
293 def _generate_sapisidhash_header(self):
294 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
295 if sapisid_cookie is None:
296 return
297 time_now = round(time.time())
298 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
299 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
300
301 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 302 note='Downloading API JSON', errnote='Unable to download API page',
303 context=None, api_key=None):
304
305 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 306 data.update(query)
f4f751af 307 real_headers = self._generate_api_headers()
308 real_headers.update({'content-type': 'application/json'})
309 if headers:
310 real_headers.update(headers)
545cc85d 311 return self._download_json(
a5c56234
M
312 'https://www.youtube.com/youtubei/v1/%s' % ep,
313 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 314 data=json.dumps(data).encode('utf8'), headers=real_headers,
315 query={'key': api_key or self._extract_api_key()})
316
317 def _extract_api_key(self, ytcfg=None):
318 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 319
8bdd16b4 320 def _extract_yt_initial_data(self, video_id, webpage):
321 return self._parse_json(
322 self._search_regex(
29f7c58a 323 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 324 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 325 video_id)
0c148415 326
a1c5d2ca
M
327 def _extract_identity_token(self, webpage, item_id):
328 ytcfg = self._extract_ytcfg(item_id, webpage)
329 if ytcfg:
330 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
331 if token:
332 return token
333 return self._search_regex(
334 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
335 'identity token', default=None)
336
337 @staticmethod
338 def _extract_account_syncid(data):
8ea3f7b9 339 """
340 Extract syncId required to download private playlists of secondary channels
341 @param data Either response or ytcfg
342 """
343 sync_ids = (try_get(
344 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
345 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
346 if len(sync_ids) >= 2 and sync_ids[1]:
347 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
348 # and just "user_syncid||" for primary channel. We only want the channel_syncid
349 return sync_ids[0]
8ea3f7b9 350 # ytcfg includes channel_syncid if on secondary channel
351 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 352
29f7c58a 353 def _extract_ytcfg(self, video_id, webpage):
8c54a305 354 if not webpage:
355 return {}
29f7c58a 356 return self._parse_json(
357 self._search_regex(
358 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 359 default='{}'), video_id, fatal=False) or {}
360
361 def __extract_client_version(self, ytcfg):
362 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
363
364 def _extract_context(self, ytcfg=None):
365 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
366 if context:
367 return context
368
369 # Recreate the client context (required)
370 client_version = self.__extract_client_version(ytcfg)
371 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
372 context = {
373 'client': {
374 'clientName': client_name,
375 'clientVersion': client_version,
376 }
377 }
378 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
379 if visitor_data:
380 context['client']['visitorData'] = visitor_data
381 return context
382
383 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
384 headers = {
385 'X-YouTube-Client-Name': '1',
386 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
387 }
388 if identity_token:
389 headers['x-youtube-identity-token'] = identity_token
390 if account_syncid:
391 headers['X-Goog-PageId'] = account_syncid
392 headers['X-Goog-AuthUser'] = 0
393 if visitor_data:
394 headers['x-goog-visitor-id'] = visitor_data
395 auth = self._generate_sapisidhash_header()
396 if auth is not None:
397 headers['Authorization'] = auth
398 headers['X-Origin'] = 'https://www.youtube.com'
399 return headers
29f7c58a 400
30a074c2 401 def _extract_video(self, renderer):
402 video_id = renderer.get('videoId')
403 title = try_get(
404 renderer,
405 (lambda x: x['title']['runs'][0]['text'],
406 lambda x: x['title']['simpleText']), compat_str)
407 description = try_get(
408 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
409 compat_str)
410 duration = parse_duration(try_get(
411 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
412 view_count_text = try_get(
413 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
414 view_count = str_to_int(self._search_regex(
415 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
416 'view count', default=None))
417 uploader = try_get(
bc2ca1bb 418 renderer,
419 (lambda x: x['ownerText']['runs'][0]['text'],
420 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 421 return {
39ed931e 422 '_type': 'url',
30a074c2 423 'ie_key': YoutubeIE.ie_key(),
424 'id': video_id,
425 'url': video_id,
426 'title': title,
427 'description': description,
428 'duration': duration,
429 'view_count': view_count,
430 'uploader': uploader,
431 }
432
0c148415 433
360e1ca5 434class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 435 IE_DESC = 'YouTube.com'
bc2ca1bb 436 _INVIDIOUS_SITES = (
437 # invidious-redirect websites
438 r'(?:www\.)?redirect\.invidious\.io',
439 r'(?:(?:www|dev)\.)?invidio\.us',
440 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
441 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 442 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 443 r'(?:(?:www|au)\.)?ytprivate\.com',
444 r'(?:www\.)?invidious\.namazso\.eu',
445 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 446 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
447 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
448 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
449 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
450 # youtube-dl invidious instances list
451 r'(?:(?:www|no)\.)?invidiou\.sh',
452 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
453 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 454 r'(?:www\.)?invidious\.mastodon\.host',
455 r'(?:www\.)?invidious\.zapashcanon\.fr',
456 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 457 r'(?:www\.)?invidious\.tinfoil-hat\.net',
458 r'(?:www\.)?invidious\.himiko\.cloud',
459 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 460 r'(?:www\.)?invidious\.tube',
461 r'(?:www\.)?invidiou\.site',
462 r'(?:www\.)?invidious\.site',
463 r'(?:www\.)?invidious\.xyz',
464 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 465 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 466 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 467 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 468 r'(?:www\.)?tube\.poal\.co',
469 r'(?:www\.)?tube\.connect\.cafe',
470 r'(?:www\.)?vid\.wxzm\.sx',
471 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 472 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 473 r'(?:www\.)?yewtu\.be',
474 r'(?:www\.)?yt\.elukerio\.org',
475 r'(?:www\.)?yt\.lelux\.fi',
476 r'(?:www\.)?invidious\.ggc-project\.de',
477 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 478 r'(?:www\.)?ytprivate\.com',
479 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 480 r'(?:www\.)?invidious\.toot\.koeln',
481 r'(?:www\.)?invidious\.fdn\.fr',
482 r'(?:www\.)?watch\.nettohikari\.com',
483 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
484 r'(?:www\.)?qklhadlycap4cnod\.onion',
485 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
486 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
487 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
488 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
489 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
490 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
491 )
cb7dfeea 492 _VALID_URL = r"""(?x)^
c5e8d7af 493 (
edb53e2d 494 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 495 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
496 (?:www\.)?deturl\.com/www\.youtube\.com|
497 (?:www\.)?pwnyoutube\.com|
498 (?:www\.)?hooktube\.com|
499 (?:www\.)?yourepeat\.com|
500 tube\.majestyc\.net|
501 %(invidious)s|
502 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
503 (?:.*?\#/)? # handle anchor (#/) redirect urls
504 (?: # the various things that can precede the ID:
ac7553d0 505 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 506 |(?: # or the v= param in all its forms
f7000f3a 507 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 508 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 509 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
510 v=
511 )
f4b05232 512 ))
cbaed4bb
S
513 |(?:
514 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
515 vid\.plus| # or vid.plus/xxxx
516 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 517 %(invidious)s
cbaed4bb 518 )/
edb53e2d 519 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 520 )
c5e8d7af 521 )? # all until now is optional -> you can pass the naked ID
201c1459 522 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 523 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 524 $""" % {
bc2ca1bb 525 'invidious': '|'.join(_INVIDIOUS_SITES),
526 }
e40c758c 527 _PLAYER_INFO_RE = (
cc2db878 528 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
529 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 530 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 531 )
2c62dc26 532 _formats = {
c2d3cb4c 533 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
534 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
535 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
536 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
537 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
538 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
539 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
540 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 541 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 542 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
543 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
544 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
545 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
546 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
547 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 548 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 549 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
550 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 551
552
553 # 3D videos
c2d3cb4c 554 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
555 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
556 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
557 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 558 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
559 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
560 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 561
96fb5605 562 # Apple HTTP Live Streaming
11f12195 563 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 564 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
565 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
566 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
567 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
568 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 569 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
570 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
571
572 # DASH mp4 video
d23028a8
S
573 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
574 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
575 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
576 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
577 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 578 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
579 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
580 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
581 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
582 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
583 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
584 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 585
f6f1fc92 586 # Dash mp4 audio
d23028a8
S
587 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
588 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
589 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
590 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
591 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
592 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
593 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
594
595 # Dash webm
d23028a8
S
596 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
597 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
598 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
599 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
600 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
601 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
602 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
603 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
604 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
605 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
606 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
607 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
608 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
609 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
610 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 611 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
612 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
613 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
614 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
615 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
616 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
617 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
618
619 # Dash webm audio
d23028a8
S
620 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
621 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 622
0857baad 623 # Dash webm audio with opus inside
d23028a8
S
624 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
625 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
626 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 627
ce6b9a2d
PH
628 # RTMP (unnamed)
629 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
630
631 # av01 video only formats sometimes served with "unknown" codecs
632 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
633 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
634 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
635 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 636 }
29f7c58a 637 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 638
fd5c4aab
S
639 _GEO_BYPASS = False
640
78caa52a 641 IE_NAME = 'youtube'
2eb88d95
PH
642 _TESTS = [
643 {
2d3d2997 644 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
645 'info_dict': {
646 'id': 'BaW_jenozKc',
647 'ext': 'mp4',
3867038a 648 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
649 'uploader': 'Philipp Hagemeister',
650 'uploader_id': 'phihag',
ec85ded8 651 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
652 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
653 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 654 'upload_date': '20121002',
3867038a 655 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 656 'categories': ['Science & Technology'],
3867038a 657 'tags': ['youtube-dl'],
556dbe7f 658 'duration': 10,
dbdaaa23 659 'view_count': int,
3e7c1224
PH
660 'like_count': int,
661 'dislike_count': int,
7c80519c 662 'start_time': 1,
297a564b 663 'end_time': 9,
2eb88d95 664 }
0e853ca4 665 },
fccd3771 666 {
4bc3a23e
PH
667 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
668 'note': 'Embed-only video (#1746)',
669 'info_dict': {
670 'id': 'yZIXLfi8CZQ',
671 'ext': 'mp4',
672 'upload_date': '20120608',
673 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
674 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
675 'uploader': 'SET India',
94bfcd23 676 'uploader_id': 'setindia',
ec85ded8 677 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 678 'age_limit': 18,
545cc85d 679 },
680 'skip': 'Private video',
fccd3771 681 },
11b56058 682 {
8bdd16b4 683 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
684 'note': 'Use the first video ID in the URL',
685 'info_dict': {
686 'id': 'BaW_jenozKc',
687 'ext': 'mp4',
3867038a 688 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
689 'uploader': 'Philipp Hagemeister',
690 'uploader_id': 'phihag',
ec85ded8 691 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 692 'upload_date': '20121002',
3867038a 693 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 694 'categories': ['Science & Technology'],
3867038a 695 'tags': ['youtube-dl'],
556dbe7f 696 'duration': 10,
dbdaaa23 697 'view_count': int,
11b56058
PM
698 'like_count': int,
699 'dislike_count': int,
34a7de29
S
700 },
701 'params': {
702 'skip_download': True,
703 },
11b56058 704 },
dd27fd17 705 {
2d3d2997 706 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
707 'note': '256k DASH audio (format 141) via DASH manifest',
708 'info_dict': {
709 'id': 'a9LDPn-MO4I',
710 'ext': 'm4a',
711 'upload_date': '20121002',
712 'uploader_id': '8KVIDEO',
ec85ded8 713 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
714 'description': '',
715 'uploader': '8KVIDEO',
716 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 717 },
4bc3a23e
PH
718 'params': {
719 'youtube_include_dash_manifest': True,
720 'format': '141',
4919603f 721 },
de3c7fe0 722 'skip': 'format 141 not served anymore',
dd27fd17 723 },
8bdd16b4 724 # DASH manifest with encrypted signature
725 {
726 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
727 'info_dict': {
728 'id': 'IB3lcPjvWLA',
729 'ext': 'm4a',
730 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
731 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
732 'duration': 244,
733 'uploader': 'AfrojackVEVO',
734 'uploader_id': 'AfrojackVEVO',
735 'upload_date': '20131011',
cc2db878 736 'abr': 129.495,
8bdd16b4 737 },
738 'params': {
739 'youtube_include_dash_manifest': True,
740 'format': '141/bestaudio[ext=m4a]',
741 },
742 },
aa79ac0c
PH
743 # Controversy video
744 {
745 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
746 'info_dict': {
747 'id': 'T4XJQO3qol8',
748 'ext': 'mp4',
556dbe7f 749 'duration': 219,
aa79ac0c 750 'upload_date': '20100909',
4fe54c12 751 'uploader': 'Amazing Atheist',
aa79ac0c 752 'uploader_id': 'TheAmazingAtheist',
ec85ded8 753 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 754 'title': 'Burning Everyone\'s Koran',
545cc85d 755 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 756 }
c522adb1 757 },
dd2d55f1 758 # Normal age-gate video (embed allowed)
c522adb1 759 {
2d3d2997 760 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
761 'info_dict': {
762 'id': 'HtVdAasjOgU',
763 'ext': 'mp4',
764 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 765 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 766 'duration': 142,
c522adb1
JMF
767 'uploader': 'The Witcher',
768 'uploader_id': 'WitcherGame',
ec85ded8 769 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 770 'upload_date': '20140605',
34952f09 771 'age_limit': 18,
c522adb1
JMF
772 },
773 },
8bdd16b4 774 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
775 # YouTube Red ad is not captured for creator
776 {
777 'url': '__2ABJjxzNo',
778 'info_dict': {
779 'id': '__2ABJjxzNo',
780 'ext': 'mp4',
781 'duration': 266,
782 'upload_date': '20100430',
783 'uploader_id': 'deadmau5',
784 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 785 'creator': 'deadmau5',
786 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 787 'uploader': 'deadmau5',
788 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 789 'alt_title': 'Some Chords',
8bdd16b4 790 },
791 'expected_warnings': [
792 'DASH manifest missing',
793 ]
794 },
067aa17e 795 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
796 {
797 'url': 'lqQg6PlCWgI',
798 'info_dict': {
799 'id': 'lqQg6PlCWgI',
800 'ext': 'mp4',
556dbe7f 801 'duration': 6085,
90227264 802 'upload_date': '20150827',
cbe2bd91 803 'uploader_id': 'olympic',
ec85ded8 804 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 805 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 806 'uploader': 'Olympic',
cbe2bd91
PH
807 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
808 },
809 'params': {
810 'skip_download': 'requires avconv',
e52a40ab 811 }
cbe2bd91 812 },
6271f1ca
PH
813 # Non-square pixels
814 {
815 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
816 'info_dict': {
817 'id': '_b-2C3KPAM0',
818 'ext': 'mp4',
819 'stretched_ratio': 16 / 9.,
556dbe7f 820 'duration': 85,
6271f1ca
PH
821 'upload_date': '20110310',
822 'uploader_id': 'AllenMeow',
ec85ded8 823 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 824 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 825 'uploader': '孫ᄋᄅ',
6271f1ca
PH
826 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
827 },
06b491eb
S
828 },
829 # url_encoded_fmt_stream_map is empty string
830 {
831 'url': 'qEJwOuvDf7I',
832 'info_dict': {
833 'id': 'qEJwOuvDf7I',
f57b7835 834 'ext': 'webm',
06b491eb
S
835 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
836 'description': '',
837 'upload_date': '20150404',
838 'uploader_id': 'spbelect',
839 'uploader': 'Наблюдатели Петербурга',
840 },
841 'params': {
842 'skip_download': 'requires avconv',
e323cf3f
S
843 },
844 'skip': 'This live event has ended.',
06b491eb 845 },
067aa17e 846 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
847 {
848 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
849 'info_dict': {
850 'id': 'FIl7x6_3R5Y',
eb6793ba 851 'ext': 'webm',
da77d856
S
852 'title': 'md5:7b81415841e02ecd4313668cde88737a',
853 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 854 'duration': 220,
da77d856
S
855 'upload_date': '20150625',
856 'uploader_id': 'dorappi2000',
ec85ded8 857 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 858 'uploader': 'dorappi2000',
eb6793ba 859 'formats': 'mincount:31',
da77d856 860 },
eb6793ba 861 'skip': 'not actual anymore',
2ee8f5d8 862 },
8a1a26ce
YCH
863 # DASH manifest with segment_list
864 {
865 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
866 'md5': '8ce563a1d667b599d21064e982ab9e31',
867 'info_dict': {
868 'id': 'CsmdDsKjzN8',
869 'ext': 'mp4',
17ee98e1 870 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
871 'uploader': 'Airtek',
872 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
873 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
874 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
875 },
876 'params': {
877 'youtube_include_dash_manifest': True,
878 'format': '135', # bestvideo
be49068d
S
879 },
880 'skip': 'This live event has ended.',
2ee8f5d8 881 },
cf7e015f
S
882 {
883 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 884 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 885 'info_dict': {
545cc85d 886 'id': 'jvGDaLqkpTg',
887 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
888 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
889 },
890 'playlist': [{
891 'info_dict': {
545cc85d 892 'id': 'jvGDaLqkpTg',
cf7e015f 893 'ext': 'mp4',
545cc85d 894 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
895 'description': 'md5:e03b909557865076822aa169218d6a5d',
896 'duration': 10643,
897 'upload_date': '20161111',
898 'uploader': 'Team PGP',
899 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
900 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
901 },
902 }, {
903 'info_dict': {
545cc85d 904 'id': '3AKt1R1aDnw',
cf7e015f 905 'ext': 'mp4',
545cc85d 906 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
907 'description': 'md5:e03b909557865076822aa169218d6a5d',
908 'duration': 10991,
909 'upload_date': '20161111',
910 'uploader': 'Team PGP',
911 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
912 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
913 },
914 }, {
915 'info_dict': {
545cc85d 916 'id': 'RtAMM00gpVc',
cf7e015f 917 'ext': 'mp4',
545cc85d 918 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
919 'description': 'md5:e03b909557865076822aa169218d6a5d',
920 'duration': 10995,
921 'upload_date': '20161111',
922 'uploader': 'Team PGP',
923 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
924 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
925 },
926 }, {
927 'info_dict': {
545cc85d 928 'id': '6N2fdlP3C5U',
cf7e015f 929 'ext': 'mp4',
545cc85d 930 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
931 'description': 'md5:e03b909557865076822aa169218d6a5d',
932 'duration': 10990,
933 'upload_date': '20161111',
934 'uploader': 'Team PGP',
935 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
936 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
937 },
938 }],
939 'params': {
940 'skip_download': True,
941 },
cbaed4bb 942 },
f9f49d87 943 {
067aa17e 944 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
945 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
946 'info_dict': {
947 'id': 'gVfLd0zydlo',
948 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
949 },
950 'playlist_count': 2,
be49068d 951 'skip': 'Not multifeed anymore',
f9f49d87 952 },
cbaed4bb 953 {
2d3d2997 954 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 955 'only_matching': True,
0e49d9a6 956 },
6d4fc66b 957 {
2d3d2997 958 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
959 'only_matching': True,
960 },
0e49d9a6 961 {
067aa17e 962 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 963 # Also tests cut-off URL expansion in video description (see
067aa17e
S
964 # https://github.com/ytdl-org/youtube-dl/issues/1892,
965 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
966 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
967 'info_dict': {
968 'id': 'lsguqyKfVQg',
969 'ext': 'mp4',
970 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 971 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 972 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 973 'duration': 133,
0e49d9a6
LL
974 'upload_date': '20151119',
975 'uploader_id': 'IronSoulElf',
ec85ded8 976 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 977 'uploader': 'IronSoulElf',
eb6793ba
S
978 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
979 'track': 'Dark Walk - Position Music',
980 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 981 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
982 },
983 'params': {
984 'skip_download': True,
985 },
986 },
61f92af1 987 {
067aa17e 988 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
989 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
990 'only_matching': True,
991 },
313dfc45
LL
992 {
993 # Video with yt:stretch=17:0
994 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
995 'info_dict': {
996 'id': 'Q39EVAstoRM',
997 'ext': 'mp4',
998 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
999 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1000 'upload_date': '20151107',
1001 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1002 'uploader': 'CH GAMER DROID',
1003 },
1004 'params': {
1005 'skip_download': True,
1006 },
be49068d 1007 'skip': 'This video does not exist.',
313dfc45 1008 },
201c1459 1009 {
1010 # Video with incomplete 'yt:stretch=16:'
1011 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1012 'only_matching': True,
1013 },
7caf9830
S
1014 {
1015 # Video licensed under Creative Commons
1016 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1017 'info_dict': {
1018 'id': 'M4gD1WSo5mA',
1019 'ext': 'mp4',
1020 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1021 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1022 'duration': 721,
7caf9830
S
1023 'upload_date': '20150127',
1024 'uploader_id': 'BerkmanCenter',
ec85ded8 1025 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1026 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1027 'license': 'Creative Commons Attribution license (reuse allowed)',
1028 },
1029 'params': {
1030 'skip_download': True,
1031 },
1032 },
fd050249
S
1033 {
1034 # Channel-like uploader_url
1035 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1036 'info_dict': {
1037 'id': 'eQcmzGIKrzg',
1038 'ext': 'mp4',
1039 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1040 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1041 'duration': 4060,
fd050249 1042 'upload_date': '20151119',
eb6793ba 1043 'uploader': 'Bernie Sanders',
fd050249 1044 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1046 'license': 'Creative Commons Attribution license (reuse allowed)',
1047 },
1048 'params': {
1049 'skip_download': True,
1050 },
1051 },
040ac686
S
1052 {
1053 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1054 'only_matching': True,
7f29cf54
S
1055 },
1056 {
067aa17e 1057 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1058 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1059 'only_matching': True,
6496ccb4
S
1060 },
1061 {
1062 # Rental video preview
1063 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1064 'info_dict': {
1065 'id': 'uGpuVWrhIzE',
1066 'ext': 'mp4',
1067 'title': 'Piku - Trailer',
1068 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1069 'upload_date': '20150811',
1070 'uploader': 'FlixMatrix',
1071 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1072 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1073 'license': 'Standard YouTube License',
1074 },
1075 'params': {
1076 'skip_download': True,
1077 },
eb6793ba 1078 'skip': 'This video is not available.',
022a5d66 1079 },
12afdc2a
S
1080 {
1081 # YouTube Red video with episode data
1082 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1083 'info_dict': {
1084 'id': 'iqKdEhx-dD4',
1085 'ext': 'mp4',
1086 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1087 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1088 'duration': 2085,
12afdc2a
S
1089 'upload_date': '20170118',
1090 'uploader': 'Vsauce',
1091 'uploader_id': 'Vsauce',
1092 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1093 'series': 'Mind Field',
1094 'season_number': 1,
1095 'episode_number': 1,
1096 },
1097 'params': {
1098 'skip_download': True,
1099 },
1100 'expected_warnings': [
1101 'Skipping DASH manifest',
1102 ],
1103 },
c7121fa7
S
1104 {
1105 # The following content has been identified by the YouTube community
1106 # as inappropriate or offensive to some audiences.
1107 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1108 'info_dict': {
1109 'id': '6SJNVb0GnPI',
1110 'ext': 'mp4',
1111 'title': 'Race Differences in Intelligence',
1112 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1113 'duration': 965,
1114 'upload_date': '20140124',
1115 'uploader': 'New Century Foundation',
1116 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1117 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1118 },
1119 'params': {
1120 'skip_download': True,
1121 },
545cc85d 1122 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1123 },
022a5d66
S
1124 {
1125 # itag 212
1126 'url': '1t24XAntNCY',
1127 'only_matching': True,
fd5c4aab
S
1128 },
1129 {
1130 # geo restricted to JP
1131 'url': 'sJL6WA-aGkQ',
1132 'only_matching': True,
1133 },
cd5a74a2
S
1134 {
1135 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1136 'only_matching': True,
1137 },
bc2ca1bb 1138 {
1139 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1140 'only_matching': True,
1141 },
1142 {
1143 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1144 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1145 'only_matching': True,
1146 },
825cd268
RA
1147 {
1148 # DRM protected
1149 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1150 'only_matching': True,
4fe54c12
S
1151 },
1152 {
1153 # Video with unsupported adaptive stream type formats
1154 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1155 'info_dict': {
1156 'id': 'Z4Vy8R84T1U',
1157 'ext': 'mp4',
1158 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1159 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1160 'duration': 433,
1161 'upload_date': '20130923',
1162 'uploader': 'Amelia Putri Harwita',
1163 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1164 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1165 'formats': 'maxcount:10',
1166 },
1167 'params': {
1168 'skip_download': True,
1169 'youtube_include_dash_manifest': False,
1170 },
5429d6a9 1171 'skip': 'not actual anymore',
5caabd3c 1172 },
1173 {
822b9d9c 1174 # Youtube Music Auto-generated description
5caabd3c 1175 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1176 'info_dict': {
1177 'id': 'MgNrAu2pzNs',
1178 'ext': 'mp4',
1179 'title': 'Voyeur Girl',
1180 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1181 'upload_date': '20190312',
5429d6a9
S
1182 'uploader': 'Stephen - Topic',
1183 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1184 'artist': 'Stephen',
1185 'track': 'Voyeur Girl',
1186 'album': 'it\'s too much love to know my dear',
1187 'release_date': '20190313',
1188 'release_year': 2019,
1189 },
1190 'params': {
1191 'skip_download': True,
1192 },
1193 },
66b48727
RA
1194 {
1195 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1196 'only_matching': True,
1197 },
011e75e6
S
1198 {
1199 # invalid -> valid video id redirection
1200 'url': 'DJztXj2GPfl',
1201 'info_dict': {
1202 'id': 'DJztXj2GPfk',
1203 'ext': 'mp4',
1204 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1205 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1206 'upload_date': '20090125',
1207 'uploader': 'Prochorowka',
1208 'uploader_id': 'Prochorowka',
1209 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1210 'artist': 'Panjabi MC',
1211 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1212 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1213 },
1214 'params': {
1215 'skip_download': True,
1216 },
545cc85d 1217 'skip': 'Video unavailable',
ea74e00b
DP
1218 },
1219 {
1220 # empty description results in an empty string
1221 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1222 'info_dict': {
1223 'id': 'x41yOUIvK2k',
1224 'ext': 'mp4',
1225 'title': 'IMG 3456',
1226 'description': '',
1227 'upload_date': '20170613',
1228 'uploader_id': 'ElevageOrVert',
1229 'uploader': 'ElevageOrVert',
1230 },
1231 'params': {
1232 'skip_download': True,
1233 },
1234 },
a0566bbf 1235 {
29f7c58a 1236 # with '};' inside yt initial data (see [1])
1237 # see [2] for an example with '};' inside ytInitialPlayerResponse
1238 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1239 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1240 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1241 'info_dict': {
1242 'id': 'CHqg6qOn4no',
1243 'ext': 'mp4',
1244 'title': 'Part 77 Sort a list of simple types in c#',
1245 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1246 'upload_date': '20130831',
1247 'uploader_id': 'kudvenkat',
1248 'uploader': 'kudvenkat',
1249 },
1250 'params': {
1251 'skip_download': True,
1252 },
1253 },
29f7c58a 1254 {
1255 # another example of '};' in ytInitialData
1256 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1257 'only_matching': True,
1258 },
1259 {
1260 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1261 'only_matching': True,
1262 },
545cc85d 1263 {
cc2db878 1264 # https://github.com/ytdl-org/youtube-dl/pull/28094
1265 'url': 'OtqTfy26tG0',
1266 'info_dict': {
1267 'id': 'OtqTfy26tG0',
1268 'ext': 'mp4',
1269 'title': 'Burn Out',
1270 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1271 'upload_date': '20141120',
1272 'uploader': 'The Cinematic Orchestra - Topic',
1273 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1274 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1275 'artist': 'The Cinematic Orchestra',
1276 'track': 'Burn Out',
1277 'album': 'Every Day',
1278 'release_data': None,
1279 'release_year': None,
1280 },
1281 'params': {
1282 'skip_download': True,
1283 },
545cc85d 1284 },
bc2ca1bb 1285 {
1286 # controversial video, only works with bpctr when authenticated with cookies
1287 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1288 'only_matching': True,
1289 },
f7ad7160 1290 {
1291 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1292 'url': 'cBvYw8_A0vQ',
1293 'info_dict': {
1294 'id': 'cBvYw8_A0vQ',
1295 'ext': 'mp4',
1296 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1297 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1298 'upload_date': '20201120',
1299 'uploader': 'Walk around Japan',
1300 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1301 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1302 },
1303 'params': {
1304 'skip_download': True,
1305 },
0fb983f6 1306 }, {
1307 # Has multiple audio streams
1308 'url': 'WaOKSUlf4TM',
1309 'only_matching': True
f7ad7160 1310 },
120916da 1311 {
1312 # multiple subtitles with same lang_code
1313 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1314 'only_matching': True,
1315 },
2eb88d95
PH
1316 ]
1317
201c1459 1318 @classmethod
1319 def suitable(cls, url):
1bdae7d3 1320 # Hack for lazy extractors until more generic solution is implemented
1321 # (see #28780)
1322 from .youtube import parse_qs
201c1459 1323 qs = parse_qs(url)
1324 if qs.get('list', [None])[0]:
1325 return False
1326 return super(YoutubeIE, cls).suitable(url)
1327
e0df6211
PH
1328 def __init__(self, *args, **kwargs):
1329 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1330 self._code_cache = {}
83799698 1331 self._player_cache = {}
e0df6211 1332
60064c53
PH
1333 def _signature_cache_id(self, example_sig):
1334 """ Return a string representation of a signature """
78caa52a 1335 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1336
e40c758c
S
1337 @classmethod
1338 def _extract_player_info(cls, player_url):
1339 for player_re in cls._PLAYER_INFO_RE:
1340 id_m = re.search(player_re, player_url)
1341 if id_m:
1342 break
1343 else:
c081b35c 1344 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1345 return id_m.group('id')
e40c758c
S
1346
1347 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1348 player_id = self._extract_player_info(player_url)
e0df6211 1349
c4417ddb 1350 # Read from filesystem cache
545cc85d 1351 func_id = 'js_%s_%s' % (
1352 player_id, self._signature_cache_id(example_sig))
c4417ddb 1353 assert os.path.basename(func_id) == func_id
a0e07d31 1354
69ea8ca4 1355 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1356 if cache_spec is not None:
78caa52a 1357 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1358
545cc85d 1359 if player_id not in self._code_cache:
1360 self._code_cache[player_id] = self._download_webpage(
e0df6211 1361 player_url, video_id,
545cc85d 1362 note='Downloading player ' + player_id,
69ea8ca4 1363 errnote='Download of %s failed' % player_url)
545cc85d 1364 code = self._code_cache[player_id]
1365 res = self._parse_sig_js(code)
e0df6211 1366
785521bf
PH
1367 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1368 cache_res = res(test_string)
1369 cache_spec = [ord(c) for c in cache_res]
83799698 1370
69ea8ca4 1371 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1372 return res
1373
60064c53 1374 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1375 def gen_sig_code(idxs):
1376 def _genslice(start, end, step):
78caa52a 1377 starts = '' if start == 0 else str(start)
8bcc8756 1378 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1379 steps = '' if step == 1 else (':%d' % step)
78caa52a 1380 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1381
1382 step = None
7af808a5
PH
1383 # Quelch pyflakes warnings - start will be set when step is set
1384 start = '(Never used)'
edf3e38e
PH
1385 for i, prev in zip(idxs[1:], idxs[:-1]):
1386 if step is not None:
1387 if i - prev == step:
1388 continue
1389 yield _genslice(start, prev, step)
1390 step = None
1391 continue
1392 if i - prev in [-1, 1]:
1393 step = i - prev
1394 start = prev
1395 continue
1396 else:
78caa52a 1397 yield 's[%d]' % prev
edf3e38e 1398 if step is None:
78caa52a 1399 yield 's[%d]' % i
edf3e38e
PH
1400 else:
1401 yield _genslice(start, i, step)
1402
78caa52a 1403 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1404 cache_res = func(test_string)
edf3e38e 1405 cache_spec = [ord(c) for c in cache_res]
78caa52a 1406 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1407 signature_id_tuple = '(%s)' % (
1408 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1409 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1410 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1411 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1412
e0df6211
PH
1413 def _parse_sig_js(self, jscode):
1414 funcname = self._search_regex(
abefc03f
S
1415 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1416 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1417 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1418 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1419 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1420 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1421 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1422 # Obsolete patterns
1423 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1424 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1425 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1426 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1427 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1428 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1429 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1430 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1431 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1432
1433 jsi = JSInterpreter(jscode)
1434 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1435 return lambda s: initial_function([s])
1436
545cc85d 1437 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1438 """Turn the encrypted s field into a working signature"""
6b37f0be 1439
c8bf86d5 1440 if player_url is None:
69ea8ca4 1441 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1442
69ea8ca4 1443 if player_url.startswith('//'):
78caa52a 1444 player_url = 'https:' + player_url
3c90cc8b
S
1445 elif not re.match(r'https?://', player_url):
1446 player_url = compat_urlparse.urljoin(
1447 'https://www.youtube.com', player_url)
c8bf86d5 1448 try:
62af3a0e 1449 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1450 if player_id not in self._player_cache:
1451 func = self._extract_signature_function(
60064c53 1452 video_id, player_url, s
c8bf86d5
PH
1453 )
1454 self._player_cache[player_id] = func
1455 func = self._player_cache[player_id]
1456 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1457 self._print_sig_code(func, s)
c8bf86d5
PH
1458 return func(s)
1459 except Exception as e:
1460 tb = traceback.format_exc()
1461 raise ExtractorError(
78caa52a 1462 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1463
545cc85d 1464 def _mark_watched(self, video_id, player_response):
21c340b8
S
1465 playback_url = url_or_none(try_get(
1466 player_response,
545cc85d 1467 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1468 if not playback_url:
1469 return
1470 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1471 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1472
1473 # cpn generation algorithm is reverse engineered from base.js.
1474 # In fact it works even with dummy cpn.
1475 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1476 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1477
1478 qs.update({
1479 'ver': ['2'],
1480 'cpn': [cpn],
1481 })
1482 playback_url = compat_urlparse.urlunparse(
15707c7e 1483 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1484
1485 self._download_webpage(
1486 playback_url, video_id, 'Marking watched',
1487 'Unable to mark watched', fatal=False)
1488
66c9fa36
S
1489 @staticmethod
1490 def _extract_urls(webpage):
1491 # Embedded YouTube player
1492 entries = [
1493 unescapeHTML(mobj.group('url'))
1494 for mobj in re.finditer(r'''(?x)
1495 (?:
1496 <iframe[^>]+?src=|
1497 data-video-url=|
1498 <embed[^>]+?src=|
1499 embedSWF\(?:\s*|
1500 <object[^>]+data=|
1501 new\s+SWFObject\(
1502 )
1503 (["\'])
1504 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1505 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1506 \1''', webpage)]
1507
1508 # lazyYT YouTube embed
1509 entries.extend(list(map(
1510 unescapeHTML,
1511 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1512
1513 # Wordpress "YouTube Video Importer" plugin
1514 matches = re.findall(r'''(?x)<div[^>]+
1515 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1516 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1517 entries.extend(m[-1] for m in matches)
1518
1519 return entries
1520
1521 @staticmethod
1522 def _extract_url(webpage):
1523 urls = YoutubeIE._extract_urls(webpage)
1524 return urls[0] if urls else None
1525
97665381
PH
1526 @classmethod
1527 def extract_id(cls, url):
1528 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1529 if mobj is None:
69ea8ca4 1530 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1531 video_id = mobj.group(2)
1532 return video_id
1533
545cc85d 1534 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1535 chapters_list = try_get(
8bdd16b4 1536 data,
84213ea8
S
1537 lambda x: x['playerOverlays']
1538 ['playerOverlayRenderer']
1539 ['decoratedPlayerBarRenderer']
1540 ['decoratedPlayerBarRenderer']
1541 ['playerBar']
1542 ['chapteredPlayerBarRenderer']
1543 ['chapters'],
1544 list)
1545 if not chapters_list:
1546 return
1547
1548 def chapter_time(chapter):
1549 return float_or_none(
1550 try_get(
1551 chapter,
1552 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1553 int),
1554 scale=1000)
1555 chapters = []
1556 for next_num, chapter in enumerate(chapters_list, start=1):
1557 start_time = chapter_time(chapter)
1558 if start_time is None:
1559 continue
1560 end_time = (chapter_time(chapters_list[next_num])
1561 if next_num < len(chapters_list) else duration)
1562 if end_time is None:
1563 continue
1564 title = try_get(
1565 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1566 compat_str)
1567 chapters.append({
1568 'start_time': start_time,
1569 'end_time': end_time,
1570 'title': title,
1571 })
1572 return chapters
1573
545cc85d 1574 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1575 return self._parse_json(self._search_regex(
1576 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1577 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1578
d92f5d5a 1579 @staticmethod
1580 def parse_time_text(time_text):
1581 """
1582 Parse the comment time text
1583 time_text is in the format 'X units ago (edited)'
1584 """
1585 time_text_split = time_text.split(' ')
1586 if len(time_text_split) >= 3:
1587 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1588
a1c5d2ca
M
1589 @staticmethod
1590 def _join_text_entries(runs):
1591 text = None
1592 for run in runs:
1593 if not isinstance(run, dict):
1594 continue
1595 sub_text = try_get(run, lambda x: x['text'], compat_str)
1596 if sub_text:
1597 if not text:
1598 text = sub_text
1599 continue
1600 text += sub_text
1601 return text
1602
1603 def _extract_comment(self, comment_renderer, parent=None):
1604 comment_id = comment_renderer.get('commentId')
1605 if not comment_id:
1606 return
1607 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1608 text = self._join_text_entries(comment_text_runs) or ''
1609 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1610 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1611 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1612 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1613 author_id = try_get(comment_renderer,
1614 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1615 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1616 lambda x: x['likeCount']), compat_str)) or 0
1617 author_thumbnail = try_get(comment_renderer,
1618 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1619
1620 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1621 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1622 return {
1623 'id': comment_id,
1624 'text': text,
d92f5d5a 1625 'timestamp': timestamp,
a1c5d2ca
M
1626 'time_text': time_text,
1627 'like_count': votes,
1628 'is_favorited': is_liked,
1629 'author': author,
1630 'author_id': author_id,
1631 'author_thumbnail': author_thumbnail,
1632 'author_is_uploader': author_is_uploader,
1633 'parent': parent or 'root'
1634 }
1635
1636 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1637 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1638
1639 def extract_thread(parent_renderer):
1640 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1641 if not parent:
1642 comment_counts[2] = 0
1643 for content in contents:
1644 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1645 comment_renderer = try_get(
1646 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1647 content, (lambda x: x['commentRenderer'], dict))
1648
1649 if not comment_renderer:
1650 continue
1651 comment = self._extract_comment(comment_renderer, parent)
1652 if not comment:
1653 continue
1654 comment_counts[0] += 1
1655 yield comment
1656 # Attempt to get the replies
1657 comment_replies_renderer = try_get(
1658 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1659
1660 if comment_replies_renderer:
1661 comment_counts[2] += 1
1662 comment_entries_iter = self._comment_entries(
f4f751af 1663 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1664 parent=comment.get('id'), session_token_list=session_token_list,
1665 comment_counts=comment_counts)
1666
1667 for reply_comment in comment_entries_iter:
1668 yield reply_comment
1669
1670 if not comment_counts:
1671 # comment so far, est. total comments, current comment thread #
1672 comment_counts = [0, 0, 0]
a1c5d2ca
M
1673
1674 # TODO: Generalize the download code with TabIE
f4f751af 1675 context = self._extract_context(ytcfg)
1676 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1677 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1678 first_continuation = False
1679 if parent is None:
1680 first_continuation = True
1681
1682 for page_num in itertools.count(0):
1683 if not continuation:
1684 break
f4f751af 1685 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a1c5d2ca
M
1686 retries = self._downloader.params.get('extractor_retries', 3)
1687 count = -1
1688 last_error = None
1689
1690 while count < retries:
1691 count += 1
1692 if last_error:
1693 self.report_warning('%s. Retrying ...' % last_error)
1694 try:
1695 query = {
1696 'ctoken': continuation['ctoken'],
1697 'pbj': 1,
1698 'type': 'next',
1699 }
1700 if parent:
1701 query['action_get_comment_replies'] = 1
1702 else:
1703 query['action_get_comments'] = 1
1704
1705 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1706 if page_num == 0:
1707 if first_continuation:
d92f5d5a 1708 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1709 else:
d92f5d5a 1710 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1711 else:
d92f5d5a 1712 note_prefix = '%sDownloading comment%s page %d %s' % (
1713 ' ' if parent else '',
a1c5d2ca
M
1714 ' replies' if parent else '',
1715 page_num,
1716 comment_prog_str)
1717
1718 browse = self._download_json(
1719 'https://www.youtube.com/comment_service_ajax', None,
1720 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1721 headers=headers, query=query,
1722 data=urlencode_postdata({
1723 'session_token': session_token_list[0]
1724 }))
1725 except ExtractorError as e:
1726 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1727 if e.cause.code == 413:
d92f5d5a 1728 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1729 return
1730 # Downloading page may result in intermittent 5xx HTTP error
1731 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1732 last_error = 'HTTP Error %s' % e.cause.code
1733 if e.cause.code == 404:
d92f5d5a 1734 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1735 if count < retries:
1736 continue
1737 raise
1738 else:
1739 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1740 if session_token:
1741 session_token_list[0] = session_token
1742
1743 response = try_get(browse,
1744 (lambda x: x['response'],
1745 lambda x: x[1]['response'])) or {}
1746
1747 if response.get('continuationContents'):
1748 break
1749
1750 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1751 if browse.get('reload'):
d92f5d5a 1752 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1753
1754 # TODO: not tested, merged from old extractor
1755 err_msg = browse.get('externalErrorMessage')
1756 if err_msg:
1757 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1758
1759 # Youtube sometimes sends incomplete data
1760 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1761 last_error = 'Incomplete data received'
1762 if count >= retries:
6a39ee13 1763 raise ExtractorError(last_error)
a1c5d2ca
M
1764
1765 if not response:
1766 break
f4f751af 1767 visitor_data = try_get(
1768 response,
1769 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1770 compat_str) or visitor_data
a1c5d2ca
M
1771
1772 known_continuation_renderers = {
1773 'itemSectionContinuation': extract_thread,
1774 'commentRepliesContinuation': extract_thread
1775 }
1776
1777 # extract next root continuation from the results
1778 continuation_contents = try_get(
1779 response, lambda x: x['continuationContents'], dict) or {}
1780
1781 for key, value in continuation_contents.items():
1782 if key not in known_continuation_renderers:
1783 continue
1784 continuation_renderer = value
1785
1786 if first_continuation:
1787 first_continuation = False
1788 expected_comment_count = try_get(
1789 continuation_renderer,
1790 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1791 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1792 compat_str)
1793
1794 if expected_comment_count:
1795 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1796 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1797 yield comment_counts[1]
1798
1799 # TODO: cli arg.
1800 # 1/True for newest, 0/False for popular (default)
1801 comment_sort_index = int(True)
1802 sort_continuation_renderer = try_get(
1803 continuation_renderer,
1804 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1805 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1806 # If this fails, the initial continuation page
1807 # starts off with popular anyways.
1808 if sort_continuation_renderer:
1809 continuation = YoutubeTabIE._build_continuation_query(
1810 continuation=sort_continuation_renderer.get('continuation'),
1811 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1812 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1813 break
1814
1815 for entry in known_continuation_renderers[key](continuation_renderer):
1816 yield entry
1817
1818 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1819 break
1820
1821 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1822 """Entry for comment extraction"""
1823 comments = []
1824 known_entry_comment_renderers = (
1825 'itemSectionRenderer',
1826 )
1827 estimated_total = 0
1828 for entry in contents:
1829 for key, renderer in entry.items():
1830 if key not in known_entry_comment_renderers:
1831 continue
1832
1833 comment_iter = self._comment_entries(
1834 renderer,
1835 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1836 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1837 ytcfg=ytcfg,
a1c5d2ca
M
1838 session_token_list=[xsrf_token])
1839
1840 for comment in comment_iter:
1841 if isinstance(comment, int):
1842 estimated_total = comment
1843 continue
1844 comments.append(comment)
1845 break
d92f5d5a 1846 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1847 return {
1848 'comments': comments,
1849 'comment_count': len(comments),
1850 }
1851
c5e8d7af 1852 def _real_extract(self, url):
cf7e015f 1853 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1854 video_id = self._match_id(url)
1855 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1856 webpage_url = base_url + 'watch?v=' + video_id
1857 webpage = self._download_webpage(
cce889b9 1858 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1859
1860 player_response = None
1861 if webpage:
1862 player_response = self._extract_yt_initial_variable(
1863 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1864 video_id, 'initial player response')
f4f751af 1865
1866 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1867 if not player_response:
1868 player_response = self._call_api(
f4f751af 1869 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1870
1871 playability_status = player_response.get('playabilityStatus') or {}
1872 if playability_status.get('reason') == 'Sign in to confirm your age':
1873 pr = self._parse_json(try_get(compat_parse_qs(
1874 self._download_webpage(
1875 base_url + 'get_video_info', video_id,
1876 'Refetching age-gated info webpage',
1877 'unable to download video info webpage', query={
1878 'video_id': video_id,
7c60c33e 1879 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1880 }, fatal=False)),
1881 lambda x: x['player_response'][0],
1882 compat_str) or '{}', video_id)
1883 if pr:
1884 player_response = pr
1885
1886 trailer_video_id = try_get(
1887 playability_status,
1888 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1889 compat_str)
1890 if trailer_video_id:
1891 return self.url_result(
1892 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1893
545cc85d 1894 def get_text(x):
1895 if not x:
c2d125d9 1896 return
f7ad7160 1897 text = x.get('simpleText')
1898 if text and isinstance(text, compat_str):
1899 return text
1900 runs = x.get('runs')
1901 if not isinstance(runs, list):
1902 return
1903 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
15be3eb5 1904
545cc85d 1905 search_meta = (
1906 lambda x: self._html_search_meta(x, webpage, default=None)) \
1907 if webpage else lambda x: None
dbdaaa23 1908
545cc85d 1909 video_details = player_response.get('videoDetails') or {}
37357d21 1910 microformat = try_get(
545cc85d 1911 player_response,
1912 lambda x: x['microformat']['playerMicroformatRenderer'],
1913 dict) or {}
1914 video_title = video_details.get('title') \
1915 or get_text(microformat.get('title')) \
1916 or search_meta(['og:title', 'twitter:title', 'title'])
1917 video_description = video_details.get('shortDescription')
cf7e015f 1918
8fe10494 1919 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1920 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1921 multifeed_metadata_list = try_get(
1922 player_response,
1923 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1924 compat_str)
8fe10494
S
1925 if multifeed_metadata_list:
1926 entries = []
1927 feed_ids = []
1928 for feed in multifeed_metadata_list.split(','):
1929 # Unquote should take place before split on comma (,) since textual
1930 # fields may contain comma as well (see
067aa17e 1931 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1932 feed_data = compat_parse_qs(
1933 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1934
1935 def feed_entry(name):
545cc85d 1936 return try_get(
1937 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1938
1939 feed_id = feed_entry('id')
1940 if not feed_id:
1941 continue
1942 feed_title = feed_entry('title')
1943 title = video_title
1944 if feed_title:
1945 title += ' (%s)' % feed_title
8fe10494
S
1946 entries.append({
1947 '_type': 'url_transparent',
1948 'ie_key': 'Youtube',
1949 'url': smuggle_url(
545cc85d 1950 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1951 {'force_singlefeed': True}),
6b09401b 1952 'title': title,
8fe10494 1953 })
6b09401b 1954 feed_ids.append(feed_id)
8fe10494
S
1955 self.to_screen(
1956 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1957 % (', '.join(feed_ids), video_id))
545cc85d 1958 return self.playlist_result(
1959 entries, video_id, video_title, video_description)
8fe10494
S
1960 else:
1961 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1962
545cc85d 1963 formats = []
1964 itags = []
cc2db878 1965 itag_qualities = {}
545cc85d 1966 player_url = None
dca3ff4a 1967 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1968 streaming_data = player_response.get('streamingData') or {}
1969 streaming_formats = streaming_data.get('formats') or []
1970 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1971 for fmt in streaming_formats:
1972 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1973 continue
321bf820 1974
cc2db878 1975 itag = str_or_none(fmt.get('itag'))
1976 quality = fmt.get('quality')
1977 if itag and quality:
1978 itag_qualities[itag] = quality
1979 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1980 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1981 # number of fragment that would subsequently requested with (`&sq=N`)
1982 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1983 continue
1984
545cc85d 1985 fmt_url = fmt.get('url')
1986 if not fmt_url:
1987 sc = compat_parse_qs(fmt.get('signatureCipher'))
1988 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1989 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1990 if not (sc and fmt_url and encrypted_sig):
1991 continue
1992 if not player_url:
1993 if not webpage:
1994 continue
1995 player_url = self._search_regex(
1996 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1997 webpage, 'player URL', fatal=False)
1998 if not player_url:
201e9eaa 1999 continue
545cc85d 2000 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2001 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2002 fmt_url += '&' + sp + '=' + signature
2003
545cc85d 2004 if itag:
2005 itags.append(itag)
cc2db878 2006 tbr = float_or_none(
2007 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
0fb983f6 2008 audio_track = fmt.get('audioTrack') or {}
545cc85d 2009 dct = {
2010 'asr': int_or_none(fmt.get('audioSampleRate')),
2011 'filesize': int_or_none(fmt.get('contentLength')),
2012 'format_id': itag,
0fb983f6 2013 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2014 'fps': int_or_none(fmt.get('fps')),
2015 'height': int_or_none(fmt.get('height')),
dca3ff4a 2016 'quality': q(quality),
cc2db878 2017 'tbr': tbr,
545cc85d 2018 'url': fmt_url,
2019 'width': fmt.get('width'),
0fb983f6 2020 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2021 }
2022 mimetype = fmt.get('mimeType')
2023 if mimetype:
2024 mobj = re.match(
2025 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2026 if mobj:
2027 dct['ext'] = mimetype2ext(mobj.group(1))
2028 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2029 no_audio = dct.get('acodec') == 'none'
2030 no_video = dct.get('vcodec') == 'none'
2031 if no_audio:
2032 dct['vbr'] = tbr
2033 if no_video:
2034 dct['abr'] = tbr
2035 if no_audio or no_video:
545cc85d 2036 dct['downloader_options'] = {
2037 # Youtube throttles chunks >~10M
2038 'http_chunk_size': 10485760,
bf1317d2 2039 }
7c60c33e 2040 if dct.get('ext'):
2041 dct['container'] = dct['ext'] + '_dash'
545cc85d 2042 formats.append(dct)
2043
2044 hls_manifest_url = streaming_data.get('hlsManifestUrl')
2045 if hls_manifest_url:
2046 for f in self._extract_m3u8_formats(
2047 hls_manifest_url, video_id, 'mp4', fatal=False):
2048 itag = self._search_regex(
2049 r'/itag/(\d+)', f['url'], 'itag', default=None)
2050 if itag:
2051 f['format_id'] = itag
2052 formats.append(f)
2053
1418a043 2054 if self._downloader.params.get('youtube_include_dash_manifest', True):
545cc85d 2055 dash_manifest_url = streaming_data.get('dashManifestUrl')
2056 if dash_manifest_url:
545cc85d 2057 for f in self._extract_mpd_formats(
2058 dash_manifest_url, video_id, fatal=False):
cc2db878 2059 itag = f['format_id']
2060 if itag in itags:
2061 continue
dca3ff4a 2062 if itag in itag_qualities:
2063 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
2064 # but kept to maintain feature parity (and code similarity) with youtube-dl
2065 # Remove if this causes any issues with sorting in future
2066 f['quality'] = q(itag_qualities[itag])
545cc85d 2067 filesize = int_or_none(self._search_regex(
2068 r'/clen/(\d+)', f.get('fragment_base_url')
2069 or f['url'], 'file size', default=None))
2070 if filesize:
2071 f['filesize'] = filesize
cc2db878 2072 formats.append(f)
bf1317d2 2073
545cc85d 2074 if not formats:
63ad4d43 2075 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2076 self.raise_no_formats(
545cc85d 2077 'This video is DRM protected.', expected=True)
2078 pemr = try_get(
2079 playability_status,
2080 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2081 dict) or {}
2082 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2083 subreason = pemr.get('subreason')
2084 if subreason:
2085 subreason = clean_html(get_text(subreason))
2086 if subreason == 'The uploader has not made this video available in your country.':
2087 countries = microformat.get('availableCountries')
2088 if not countries:
2089 regions_allowed = search_meta('regionsAllowed')
2090 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2091 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2092 reason += '\n' + subreason
2093 if reason:
b7da73eb 2094 self.raise_no_formats(reason, expected=True)
bf1317d2 2095
545cc85d 2096 self._sort_formats(formats)
bf1317d2 2097
545cc85d 2098 keywords = video_details.get('keywords') or []
2099 if not keywords and webpage:
2100 keywords = [
2101 unescapeHTML(m.group('content'))
2102 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2103 for keyword in keywords:
2104 if keyword.startswith('yt:stretch='):
201c1459 2105 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2106 if mobj:
2107 # NB: float is intentional for forcing float division
2108 w, h = (float(v) for v in mobj.groups())
2109 if w > 0 and h > 0:
2110 ratio = w / h
2111 for f in formats:
2112 if f.get('vcodec') != 'none':
2113 f['stretched_ratio'] = ratio
2114 break
6449cd80 2115
545cc85d 2116 thumbnails = []
2117 for container in (video_details, microformat):
2118 for thumbnail in (try_get(
2119 container,
2120 lambda x: x['thumbnail']['thumbnails'], list) or []):
2121 thumbnail_url = thumbnail.get('url')
2122 if not thumbnail_url:
bf1317d2 2123 continue
1988fab7 2124 # Sometimes youtube gives a wrong thumbnail URL. See:
2125 # https://github.com/yt-dlp/yt-dlp/issues/233
2126 # https://github.com/ytdl-org/youtube-dl/issues/28023
2127 if 'maxresdefault' in thumbnail_url:
2128 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2129 thumbnails.append({
2130 'height': int_or_none(thumbnail.get('height')),
2131 'url': thumbnail_url,
2132 'width': int_or_none(thumbnail.get('width')),
2133 })
2134 if thumbnails:
2135 break
a6211d23 2136 else:
545cc85d 2137 thumbnail = search_meta(['og:image', 'twitter:image'])
2138 if thumbnail:
2139 thumbnails = [{'url': thumbnail}]
2140
2141 category = microformat.get('category') or search_meta('genre')
2142 channel_id = video_details.get('channelId') \
2143 or microformat.get('externalChannelId') \
2144 or search_meta('channelId')
2145 duration = int_or_none(
2146 video_details.get('lengthSeconds')
2147 or microformat.get('lengthSeconds')) \
2148 or parse_duration(search_meta('duration'))
2149 is_live = video_details.get('isLive')
2150 owner_profile_url = microformat.get('ownerProfileUrl')
2151
2152 info = {
2153 'id': video_id,
2154 'title': self._live_title(video_title) if is_live else video_title,
2155 'formats': formats,
2156 'thumbnails': thumbnails,
2157 'description': video_description,
2158 'upload_date': unified_strdate(
2159 microformat.get('uploadDate')
2160 or search_meta('uploadDate')),
2161 'uploader': video_details['author'],
2162 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2163 'uploader_url': owner_profile_url,
2164 'channel_id': channel_id,
2165 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2166 'duration': duration,
2167 'view_count': int_or_none(
2168 video_details.get('viewCount')
2169 or microformat.get('viewCount')
2170 or search_meta('interactionCount')),
2171 'average_rating': float_or_none(video_details.get('averageRating')),
2172 'age_limit': 18 if (
2173 microformat.get('isFamilySafe') is False
2174 or search_meta('isFamilyFriendly') == 'false'
2175 or search_meta('og:restrictions:age') == '18+') else 0,
2176 'webpage_url': webpage_url,
2177 'categories': [category] if category else None,
2178 'tags': keywords,
2179 'is_live': is_live,
2180 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2181 'was_live': video_details.get('isLiveContent'),
545cc85d 2182 }
b477fc13 2183
545cc85d 2184 pctr = try_get(
2185 player_response,
2186 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2187 subtitles = {}
2188 if pctr:
2189 def process_language(container, base_url, lang_code, query):
120916da 2190 lang_subs = container.setdefault(lang_code, [])
545cc85d 2191 for fmt in self._SUBTITLE_FORMATS:
2192 query.update({
2193 'fmt': fmt,
2194 })
2195 lang_subs.append({
2196 'ext': fmt,
2197 'url': update_url_query(base_url, query),
2198 })
7e72694b 2199
545cc85d 2200 for caption_track in (pctr.get('captionTracks') or []):
2201 base_url = caption_track.get('baseUrl')
2202 if not base_url:
2203 continue
2204 if caption_track.get('kind') != 'asr':
120916da 2205 lang_code = (
2206 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2207 or caption_track.get('languageCode'))
545cc85d 2208 if not lang_code:
2209 continue
2210 process_language(
2211 subtitles, base_url, lang_code, {})
2212 continue
2213 automatic_captions = {}
2214 for translation_language in (pctr.get('translationLanguages') or []):
2215 translation_language_code = translation_language.get('languageCode')
2216 if not translation_language_code:
2217 continue
2218 process_language(
2219 automatic_captions, base_url, translation_language_code,
2220 {'tlang': translation_language_code})
2221 info['automatic_captions'] = automatic_captions
2222 info['subtitles'] = subtitles
7e72694b 2223
545cc85d 2224 parsed_url = compat_urllib_parse_urlparse(url)
2225 for component in [parsed_url.fragment, parsed_url.query]:
2226 query = compat_parse_qs(component)
2227 for k, v in query.items():
2228 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2229 d_k += '_time'
2230 if d_k not in info and k in s_ks:
2231 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2232
2233 # Youtube Music Auto-generated description
822b9d9c 2234 if video_description:
38d70284 2235 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2236 if mobj:
822b9d9c
RA
2237 release_year = mobj.group('release_year')
2238 release_date = mobj.group('release_date')
2239 if release_date:
2240 release_date = release_date.replace('-', '')
2241 if not release_year:
545cc85d 2242 release_year = release_date[:4]
2243 info.update({
2244 'album': mobj.group('album'.strip()),
2245 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2246 'track': mobj.group('track').strip(),
2247 'release_date': release_date,
cc2db878 2248 'release_year': int_or_none(release_year),
545cc85d 2249 })
7e72694b 2250
545cc85d 2251 initial_data = None
2252 if webpage:
2253 initial_data = self._extract_yt_initial_variable(
2254 webpage, self._YT_INITIAL_DATA_RE, video_id,
2255 'yt initial data')
2256 if not initial_data:
2257 initial_data = self._call_api(
f4f751af 2258 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2259
2260 if not is_live:
2261 try:
2262 # This will error if there is no livechat
2263 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2264 info['subtitles']['live_chat'] = [{
394dcd44 2265 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2266 'video_id': video_id,
2267 'ext': 'json',
2268 'protocol': 'youtube_live_chat_replay',
2269 }]
2270 except (KeyError, IndexError, TypeError):
2271 pass
2272
2273 if initial_data:
2274 chapters = self._extract_chapters_from_json(
2275 initial_data, video_id, duration)
2276 if not chapters:
2277 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2278 contents = try_get(
2279 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2280 list)
2281 if not contents:
2282 continue
2283
2284 def chapter_time(mmlir):
2285 return parse_duration(
2286 get_text(mmlir.get('timeDescription')))
2287
2288 chapters = []
2289 for next_num, content in enumerate(contents, start=1):
2290 mmlir = content.get('macroMarkersListItemRenderer') or {}
2291 start_time = chapter_time(mmlir)
2292 end_time = chapter_time(try_get(
2293 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2294 if next_num < len(contents) else duration
2295 if start_time is None or end_time is None:
2296 continue
2297 chapters.append({
2298 'start_time': start_time,
2299 'end_time': end_time,
2300 'title': get_text(mmlir.get('title')),
2301 })
2302 if chapters:
2303 break
2304 if chapters:
2305 info['chapters'] = chapters
2306
2307 contents = try_get(
2308 initial_data,
2309 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2310 list) or []
2311 for content in contents:
2312 vpir = content.get('videoPrimaryInfoRenderer')
2313 if vpir:
2314 stl = vpir.get('superTitleLink')
2315 if stl:
2316 stl = get_text(stl)
2317 if try_get(
2318 vpir,
2319 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2320 info['location'] = stl
2321 else:
2322 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2323 if mobj:
2324 info.update({
2325 'series': mobj.group(1),
2326 'season_number': int(mobj.group(2)),
2327 'episode_number': int(mobj.group(3)),
2328 })
2329 for tlb in (try_get(
2330 vpir,
2331 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2332 list) or []):
2333 tbr = tlb.get('toggleButtonRenderer') or {}
2334 for getter, regex in [(
2335 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2336 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2337 lambda x: x['accessibility'],
2338 lambda x: x['accessibilityData']['accessibilityData'],
2339 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2340 label = (try_get(tbr, getter, dict) or {}).get('label')
2341 if label:
2342 mobj = re.match(regex, label)
2343 if mobj:
2344 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2345 break
2346 sbr_tooltip = try_get(
2347 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2348 if sbr_tooltip:
2349 like_count, dislike_count = sbr_tooltip.split(' / ')
2350 info.update({
2351 'like_count': str_to_int(like_count),
2352 'dislike_count': str_to_int(dislike_count),
2353 })
2354 vsir = content.get('videoSecondaryInfoRenderer')
2355 if vsir:
2356 info['channel'] = get_text(try_get(
2357 vsir,
2358 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2359 dict))
545cc85d 2360 rows = try_get(
2361 vsir,
2362 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2363 list) or []
2364 multiple_songs = False
2365 for row in rows:
2366 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2367 multiple_songs = True
2368 break
2369 for row in rows:
2370 mrr = row.get('metadataRowRenderer') or {}
2371 mrr_title = mrr.get('title')
2372 if not mrr_title:
2373 continue
2374 mrr_title = get_text(mrr['title'])
2375 mrr_contents_text = get_text(mrr['contents'][0])
2376 if mrr_title == 'License':
2377 info['license'] = mrr_contents_text
2378 elif not multiple_songs:
2379 if mrr_title == 'Album':
2380 info['album'] = mrr_contents_text
2381 elif mrr_title == 'Artist':
2382 info['artist'] = mrr_contents_text
2383 elif mrr_title == 'Song':
2384 info['track'] = mrr_contents_text
2385
2386 fallbacks = {
2387 'channel': 'uploader',
2388 'channel_id': 'uploader_id',
2389 'channel_url': 'uploader_url',
2390 }
2391 for to, frm in fallbacks.items():
2392 if not info.get(to):
2393 info[to] = info.get(frm)
2394
2395 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2396 v = info.get(s_k)
2397 if v:
2398 info[d_k] = v
b84071c0 2399
c224251a
M
2400 is_private = bool_or_none(video_details.get('isPrivate'))
2401 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2402 is_membersonly = None
b28f8d24 2403 is_premium = None
c224251a
M
2404 if initial_data and is_private is not None:
2405 is_membersonly = False
b28f8d24 2406 is_premium = False
c224251a
M
2407 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2408 for content in contents or []:
2409 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2410 for badge in badges or []:
2411 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2412 if label.lower() == 'members only':
2413 is_membersonly = True
2414 break
b28f8d24
M
2415 elif label.lower() == 'premium':
2416 is_premium = True
2417 break
2418 if is_membersonly or is_premium:
c224251a
M
2419 break
2420
2421 # TODO: Add this for playlists
2422 info['availability'] = self._availability(
2423 is_private=is_private,
b28f8d24 2424 needs_premium=is_premium,
c224251a
M
2425 needs_subscription=is_membersonly,
2426 needs_auth=info['age_limit'] >= 18,
2427 is_unlisted=None if is_private is None else is_unlisted)
2428
06167fbb 2429 # get xsrf for annotations or comments
2430 get_annotations = self._downloader.params.get('writeannotations', False)
2431 get_comments = self._downloader.params.get('getcomments', False)
2432 if get_annotations or get_comments:
29f7c58a 2433 xsrf_token = None
545cc85d 2434 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2435 if ytcfg:
2436 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2437 if not xsrf_token:
2438 xsrf_token = self._search_regex(
2439 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2440 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2441
2442 # annotations
06167fbb 2443 if get_annotations:
64b6a4e9
RA
2444 invideo_url = try_get(
2445 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2446 if xsrf_token and invideo_url:
29f7c58a 2447 xsrf_field_name = None
2448 if ytcfg:
2449 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2450 if not xsrf_field_name:
2451 xsrf_field_name = self._search_regex(
2452 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2453 webpage, 'xsrf field name',
29f7c58a 2454 group='xsrf_field_name', default='session_token')
8a784c74 2455 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2456 self._proto_relative_url(invideo_url),
2457 video_id, note='Downloading annotations',
2458 errnote='Unable to download video annotations', fatal=False,
2459 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2460
277d6ff5 2461 if get_comments:
a1c5d2ca 2462 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2463
545cc85d 2464 self.mark_watched(video_id, player_response)
d77ab8e2 2465
545cc85d 2466 return info
c5e8d7af 2467
5f6a1245 2468
8bdd16b4 2469class YoutubeTabIE(YoutubeBaseInfoExtractor):
2470 IE_DESC = 'YouTube.com tab'
70d5c17b 2471 _VALID_URL = r'''(?x)
2472 https?://
2473 (?:\w+\.)?
2474 (?:
2475 youtube(?:kids)?\.com|
2476 invidio\.us
2477 )/
2478 (?:
2479 (?:channel|c|user)/|
2480 (?P<not_channel>
9ba5705a 2481 feed/|hashtag/|
70d5c17b 2482 (?:playlist|watch)\?.*?\blist=
2483 )|
29f7c58a 2484 (?!(?:%s)\b) # Direct URLs
70d5c17b 2485 )
2486 (?P<id>[^/?\#&]+)
2487 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2488 IE_NAME = 'youtube:tab'
2489
81127aa5 2490 _TESTS = [{
8bdd16b4 2491 # playlists, multipage
2492 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2493 'playlist_mincount': 94,
2494 'info_dict': {
2495 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2496 'title': 'Игорь Клейнер - Playlists',
2497 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2498 'uploader': 'Игорь Клейнер',
2499 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2500 },
2501 }, {
2502 # playlists, multipage, different order
2503 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2504 'playlist_mincount': 94,
2505 'info_dict': {
2506 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2507 'title': 'Игорь Клейнер - Playlists',
2508 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2509 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2510 'uploader': 'Игорь Клейнер',
8bdd16b4 2511 },
201c1459 2512 }, {
2513 # playlists, series
2514 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2515 'playlist_mincount': 5,
2516 'info_dict': {
2517 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2518 'title': '3Blue1Brown - Playlists',
2519 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2520 },
8bdd16b4 2521 }, {
2522 # playlists, singlepage
2523 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2524 'playlist_mincount': 4,
2525 'info_dict': {
2526 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2527 'title': 'ThirstForScience - Playlists',
2528 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2529 'uploader': 'ThirstForScience',
2530 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2531 }
2532 }, {
2533 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2534 'only_matching': True,
2535 }, {
2536 # basic, single video playlist
0e30a7b9 2537 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2538 'info_dict': {
0e30a7b9 2539 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2540 'uploader': 'Sergey M.',
2541 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2542 'title': 'youtube-dl public playlist',
81127aa5 2543 },
0e30a7b9 2544 'playlist_count': 1,
9291475f 2545 }, {
8bdd16b4 2546 # empty playlist
0e30a7b9 2547 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2548 'info_dict': {
0e30a7b9 2549 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2550 'uploader': 'Sergey M.',
2551 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2552 'title': 'youtube-dl empty playlist',
9291475f
PH
2553 },
2554 'playlist_count': 0,
2555 }, {
8bdd16b4 2556 # Home tab
2557 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2558 'info_dict': {
8bdd16b4 2559 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2560 'title': 'lex will - Home',
2561 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2562 'uploader': 'lex will',
2563 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2564 },
8bdd16b4 2565 'playlist_mincount': 2,
9291475f 2566 }, {
8bdd16b4 2567 # Videos tab
2568 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2569 'info_dict': {
8bdd16b4 2570 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2571 'title': 'lex will - Videos',
2572 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2573 'uploader': 'lex will',
2574 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2575 },
8bdd16b4 2576 'playlist_mincount': 975,
9291475f 2577 }, {
8bdd16b4 2578 # Videos tab, sorted by popular
2579 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2580 'info_dict': {
8bdd16b4 2581 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2582 'title': 'lex will - Videos',
2583 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2584 'uploader': 'lex will',
2585 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2586 },
8bdd16b4 2587 'playlist_mincount': 199,
9291475f 2588 }, {
8bdd16b4 2589 # Playlists tab
2590 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2591 'info_dict': {
8bdd16b4 2592 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2593 'title': 'lex will - Playlists',
2594 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2595 'uploader': 'lex will',
2596 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2597 },
8bdd16b4 2598 'playlist_mincount': 17,
ac7553d0 2599 }, {
8bdd16b4 2600 # Community tab
2601 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2602 'info_dict': {
8bdd16b4 2603 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2604 'title': 'lex will - Community',
2605 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2606 'uploader': 'lex will',
2607 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2608 },
2609 'playlist_mincount': 18,
87dadd45 2610 }, {
8bdd16b4 2611 # Channels tab
2612 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2613 'info_dict': {
8bdd16b4 2614 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2615 'title': 'lex will - Channels',
2616 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2617 'uploader': 'lex will',
2618 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2619 },
deaec5af 2620 'playlist_mincount': 12,
6b08cdf6 2621 }, {
a0566bbf 2622 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2623 'only_matching': True,
2624 }, {
a0566bbf 2625 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2626 'only_matching': True,
2627 }, {
a0566bbf 2628 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2629 'only_matching': True,
2630 }, {
2631 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2632 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2633 'info_dict': {
2634 'title': '29C3: Not my department',
2635 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2636 'uploader': 'Christiaan008',
2637 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2638 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2639 },
2640 'playlist_count': 96,
2641 }, {
2642 'note': 'Large playlist',
2643 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2644 'info_dict': {
8bdd16b4 2645 'title': 'Uploads from Cauchemar',
2646 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2647 'uploader': 'Cauchemar',
2648 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2649 },
8bdd16b4 2650 'playlist_mincount': 1123,
2651 }, {
2652 # even larger playlist, 8832 videos
2653 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2654 'only_matching': True,
4b7df0d3
JMF
2655 }, {
2656 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2657 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2658 'info_dict': {
acf757f4
PH
2659 'title': 'Uploads from Interstellar Movie',
2660 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2661 'uploader': 'Interstellar Movie',
8bdd16b4 2662 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2663 },
481cc733 2664 'playlist_mincount': 21,
358de58c 2665 }, {
2666 'note': 'Playlist with "show unavailable videos" button',
2667 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2668 'info_dict': {
2669 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2670 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2671 'uploader': 'Phim Siêu Nhân Nhật Bản',
2672 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2673 },
2674 'playlist_mincount': 1400,
2675 'expected_warnings': [
2676 'YouTube said: INFO - Unavailable videos are hidden',
2677 ]
5d342002 2678 }, {
2679 'note': 'Playlist with unavailable videos in a later page',
2680 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2681 'info_dict': {
2682 'title': 'Uploads from BlankTV',
2683 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2684 'uploader': 'BlankTV',
2685 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2686 },
2687 'playlist_mincount': 20000,
8bdd16b4 2688 }, {
2689 # https://github.com/ytdl-org/youtube-dl/issues/21844
2690 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2691 'info_dict': {
2692 'title': 'Data Analysis with Dr Mike Pound',
2693 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2694 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2695 'uploader': 'Computerphile',
deaec5af 2696 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2697 },
2698 'playlist_mincount': 11,
2699 }, {
a0566bbf 2700 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2701 'only_matching': True,
dacb3a86
S
2702 }, {
2703 # Playlist URL that does not actually serve a playlist
2704 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2705 'info_dict': {
2706 'id': 'FqZTN594JQw',
2707 'ext': 'webm',
2708 'title': "Smiley's People 01 detective, Adventure Series, Action",
2709 'uploader': 'STREEM',
2710 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2711 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2712 'upload_date': '20150526',
2713 'license': 'Standard YouTube License',
2714 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2715 'categories': ['People & Blogs'],
2716 'tags': list,
dbdaaa23 2717 'view_count': int,
dacb3a86
S
2718 'like_count': int,
2719 'dislike_count': int,
2720 },
2721 'params': {
2722 'skip_download': True,
2723 },
13a75688 2724 'skip': 'This video is not available.',
dacb3a86 2725 'add_ie': [YoutubeIE.ie_key()],
481cc733 2726 }, {
8bdd16b4 2727 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2728 'only_matching': True,
66b48727 2729 }, {
8bdd16b4 2730 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2731 'only_matching': True,
a0566bbf 2732 }, {
2733 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2734 'info_dict': {
2735 'id': '9Auq9mYxFEE',
2736 'ext': 'mp4',
deaec5af 2737 'title': compat_str,
a0566bbf 2738 'uploader': 'Sky News',
2739 'uploader_id': 'skynews',
2740 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2741 'upload_date': '20191102',
deaec5af 2742 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2743 'categories': ['News & Politics'],
2744 'tags': list,
2745 'like_count': int,
2746 'dislike_count': int,
2747 },
2748 'params': {
2749 'skip_download': True,
2750 },
2751 }, {
2752 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2753 'info_dict': {
2754 'id': 'a48o2S1cPoo',
2755 'ext': 'mp4',
2756 'title': 'The Young Turks - Live Main Show',
2757 'uploader': 'The Young Turks',
2758 'uploader_id': 'TheYoungTurks',
2759 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2760 'upload_date': '20150715',
2761 'license': 'Standard YouTube License',
2762 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2763 'categories': ['News & Politics'],
2764 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2765 'like_count': int,
2766 'dislike_count': int,
2767 },
2768 'params': {
2769 'skip_download': True,
2770 },
2771 'only_matching': True,
2772 }, {
2773 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2774 'only_matching': True,
2775 }, {
2776 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2777 'only_matching': True,
3d3dddc9 2778 }, {
2779 'url': 'https://www.youtube.com/feed/trending',
2780 'only_matching': True,
2781 }, {
2782 # needs auth
2783 'url': 'https://www.youtube.com/feed/library',
2784 'only_matching': True,
2785 }, {
2786 # needs auth
2787 'url': 'https://www.youtube.com/feed/history',
2788 'only_matching': True,
2789 }, {
2790 # needs auth
2791 'url': 'https://www.youtube.com/feed/subscriptions',
2792 'only_matching': True,
2793 }, {
2794 # needs auth
2795 'url': 'https://www.youtube.com/feed/watch_later',
2796 'only_matching': True,
2797 }, {
2798 # no longer available?
2799 'url': 'https://www.youtube.com/feed/recommended',
2800 'only_matching': True,
29f7c58a 2801 }, {
2802 # inline playlist with not always working continuations
2803 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2804 'only_matching': True,
2805 }, {
2806 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2807 'only_matching': True,
2808 }, {
2809 'url': 'https://www.youtube.com/course',
2810 'only_matching': True,
2811 }, {
2812 'url': 'https://www.youtube.com/zsecurity',
2813 'only_matching': True,
2814 }, {
2815 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2816 'only_matching': True,
2817 }, {
2818 'url': 'https://www.youtube.com/TheYoungTurks/live',
2819 'only_matching': True,
39ed931e 2820 }, {
2821 'url': 'https://www.youtube.com/hashtag/cctv9',
2822 'info_dict': {
2823 'id': 'cctv9',
2824 'title': '#cctv9',
2825 },
2826 'playlist_mincount': 350,
201c1459 2827 }, {
2828 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2829 'only_matching': True,
29f7c58a 2830 }]
2831
2832 @classmethod
2833 def suitable(cls, url):
2834 return False if YoutubeIE.suitable(url) else super(
2835 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2836
2837 def _extract_channel_id(self, webpage):
2838 channel_id = self._html_search_meta(
2839 'channelId', webpage, 'channel id', default=None)
2840 if channel_id:
2841 return channel_id
2842 channel_url = self._html_search_meta(
2843 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2844 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2845 'twitter:app:url:googleplay'), webpage, 'channel url')
2846 return self._search_regex(
2847 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2848 channel_url, 'channel id')
15f6397c 2849
8bdd16b4 2850 @staticmethod
cd7c66cf 2851 def _extract_basic_item_renderer(item):
2852 # Modified from _extract_grid_item_renderer
201c1459 2853 known_basic_renderers = (
2854 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 2855 )
2856 for key, renderer in item.items():
201c1459 2857 if not isinstance(renderer, dict):
cd7c66cf 2858 continue
201c1459 2859 elif key in known_basic_renderers:
2860 return renderer
2861 elif key.startswith('grid') and key.endswith('Renderer'):
2862 return renderer
8bdd16b4 2863
8bdd16b4 2864 def _grid_entries(self, grid_renderer):
2865 for item in grid_renderer['items']:
2866 if not isinstance(item, dict):
39b62db1 2867 continue
cd7c66cf 2868 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2869 if not isinstance(renderer, dict):
2870 continue
2871 title = try_get(
201c1459 2872 renderer, (lambda x: x['title']['runs'][0]['text'],
2873 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 2874 # playlist
2875 playlist_id = renderer.get('playlistId')
2876 if playlist_id:
2877 yield self.url_result(
2878 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2879 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2880 video_title=title)
201c1459 2881 continue
8bdd16b4 2882 # video
2883 video_id = renderer.get('videoId')
2884 if video_id:
2885 yield self._extract_video(renderer)
201c1459 2886 continue
8bdd16b4 2887 # channel
2888 channel_id = renderer.get('channelId')
2889 if channel_id:
2890 title = try_get(
2891 renderer, lambda x: x['title']['simpleText'], compat_str)
2892 yield self.url_result(
2893 'https://www.youtube.com/channel/%s' % channel_id,
2894 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 2895 continue
2896 # generic endpoint URL support
2897 ep_url = urljoin('https://www.youtube.com/', try_get(
2898 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
2899 compat_str))
2900 if ep_url:
2901 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
2902 if ie.suitable(ep_url):
2903 yield self.url_result(
2904 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
2905 break
8bdd16b4 2906
3d3dddc9 2907 def _shelf_entries_from_content(self, shelf_renderer):
2908 content = shelf_renderer.get('content')
2909 if not isinstance(content, dict):
8bdd16b4 2910 return
cd7c66cf 2911 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2912 if renderer:
2913 # TODO: add support for nested playlists so each shelf is processed
2914 # as separate playlist
2915 # TODO: this includes only first N items
2916 for entry in self._grid_entries(renderer):
2917 yield entry
2918 renderer = content.get('horizontalListRenderer')
2919 if renderer:
2920 # TODO
2921 pass
8bdd16b4 2922
29f7c58a 2923 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2924 ep = try_get(
2925 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2926 compat_str)
2927 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2928 if shelf_url:
29f7c58a 2929 # Skipping links to another channels, note that checking for
2930 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2931 # will not work
2932 if skip_channels and '/channels?' in shelf_url:
2933 return
3d3dddc9 2934 title = try_get(
2935 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2936 yield self.url_result(shelf_url, video_title=title)
2937 # Shelf may not contain shelf URL, fallback to extraction from content
2938 for entry in self._shelf_entries_from_content(shelf_renderer):
2939 yield entry
c5e8d7af 2940
8bdd16b4 2941 def _playlist_entries(self, video_list_renderer):
2942 for content in video_list_renderer['contents']:
2943 if not isinstance(content, dict):
2944 continue
2945 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2946 if not isinstance(renderer, dict):
2947 continue
2948 video_id = renderer.get('videoId')
2949 if not video_id:
2950 continue
2951 yield self._extract_video(renderer)
07aeced6 2952
3462ffa8 2953 def _rich_entries(self, rich_grid_renderer):
2954 renderer = try_get(
70d5c17b 2955 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2956 video_id = renderer.get('videoId')
2957 if not video_id:
2958 return
2959 yield self._extract_video(renderer)
2960
8bdd16b4 2961 def _video_entry(self, video_renderer):
2962 video_id = video_renderer.get('videoId')
2963 if video_id:
2964 return self._extract_video(video_renderer)
dacb3a86 2965
8bdd16b4 2966 def _post_thread_entries(self, post_thread_renderer):
2967 post_renderer = try_get(
2968 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2969 if not post_renderer:
2970 return
2971 # video attachment
2972 video_renderer = try_get(
895b0931 2973 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
2974 video_id = video_renderer.get('videoId')
2975 if video_id:
2976 entry = self._extract_video(video_renderer)
8bdd16b4 2977 if entry:
2978 yield entry
895b0931 2979 # playlist attachment
2980 playlist_id = try_get(
2981 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
2982 if playlist_id:
2983 yield self.url_result(
e28f1c0a 2984 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2985 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 2986 # inline video links
2987 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2988 for run in runs:
2989 if not isinstance(run, dict):
2990 continue
2991 ep_url = try_get(
2992 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2993 if not ep_url:
2994 continue
2995 if not YoutubeIE.suitable(ep_url):
2996 continue
2997 ep_video_id = YoutubeIE._match_id(ep_url)
2998 if video_id == ep_video_id:
2999 continue
895b0931 3000 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3001
8bdd16b4 3002 def _post_thread_continuation_entries(self, post_thread_continuation):
3003 contents = post_thread_continuation.get('contents')
3004 if not isinstance(contents, list):
3005 return
3006 for content in contents:
3007 renderer = content.get('backstagePostThreadRenderer')
3008 if not isinstance(renderer, dict):
3009 continue
3010 for entry in self._post_thread_entries(renderer):
3011 yield entry
07aeced6 3012
39ed931e 3013 r''' # unused
3014 def _rich_grid_entries(self, contents):
3015 for content in contents:
3016 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3017 if video_renderer:
3018 entry = self._video_entry(video_renderer)
3019 if entry:
3020 yield entry
3021 '''
3022
29f7c58a 3023 @staticmethod
3024 def _build_continuation_query(continuation, ctp=None):
3025 query = {
3026 'ctoken': continuation,
3027 'continuation': continuation,
3028 }
3029 if ctp:
3030 query['itct'] = ctp
3031 return query
3032
8bdd16b4 3033 @staticmethod
3034 def _extract_next_continuation_data(renderer):
3035 next_continuation = try_get(
3036 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3037 if not next_continuation:
3038 return
3039 continuation = next_continuation.get('continuation')
3040 if not continuation:
3041 return
3042 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3043 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3044
8bdd16b4 3045 @classmethod
3046 def _extract_continuation(cls, renderer):
3047 next_continuation = cls._extract_next_continuation_data(renderer)
3048 if next_continuation:
3049 return next_continuation
cc2db878 3050 contents = []
3051 for key in ('contents', 'items'):
3052 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3053 for content in contents:
3054 if not isinstance(content, dict):
3055 continue
3056 continuation_ep = try_get(
3057 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3058 dict)
3059 if not continuation_ep:
3060 continue
3061 continuation = try_get(
3062 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3063 if not continuation:
3064 continue
3065 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3066 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3067
f4f751af 3068 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3069
70d5c17b 3070 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3071 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3072 for content in contents:
3073 if not isinstance(content, dict):
8bdd16b4 3074 continue
70d5c17b 3075 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3076 if not is_renderer:
70d5c17b 3077 renderer = content.get('richItemRenderer')
3462ffa8 3078 if renderer:
3079 for entry in self._rich_entries(renderer):
3080 yield entry
3081 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3082 continue
3462ffa8 3083 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3084 for isr_content in isr_contents:
3085 if not isinstance(isr_content, dict):
3086 continue
69184e41 3087
3088 known_renderers = {
3089 'playlistVideoListRenderer': self._playlist_entries,
3090 'gridRenderer': self._grid_entries,
3091 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3092 'backstagePostThreadRenderer': self._post_thread_entries,
3093 'videoRenderer': lambda x: [self._video_entry(x)],
3094 }
3095 for key, renderer in isr_content.items():
3096 if key not in known_renderers:
3097 continue
3098 for entry in known_renderers[key](renderer):
3099 if entry:
3100 yield entry
3462ffa8 3101 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3102 break
70d5c17b 3103
3462ffa8 3104 if not continuation_list[0]:
3105 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3106
3107 if not continuation_list[0]:
3108 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3109
3110 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3111 tab_content = try_get(tab, lambda x: x['content'], dict)
3112 if not tab_content:
3113 return
3462ffa8 3114 parent_renderer = (
29f7c58a 3115 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3116 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3117 for entry in extract_entries(parent_renderer):
3118 yield entry
3462ffa8 3119 continuation = continuation_list[0]
f4f751af 3120 context = self._extract_context(ytcfg)
3121 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3122
8bdd16b4 3123 for page_num in itertools.count(1):
3124 if not continuation:
3125 break
79360d99 3126 query = {
3127 'continuation': continuation['continuation'],
3128 'clickTracking': {'clickTrackingParams': continuation['itct']}
3129 }
f4f751af 3130 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3131 response = self._extract_response(
3132 item_id='%s page %s' % (item_id, page_num),
3133 query=query, headers=headers, ytcfg=ytcfg,
3134 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3135
3136 if not response:
8bdd16b4 3137 break
f4f751af 3138 visitor_data = try_get(
3139 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3140
69184e41 3141 known_continuation_renderers = {
3142 'playlistVideoListContinuation': self._playlist_entries,
3143 'gridContinuation': self._grid_entries,
3144 'itemSectionContinuation': self._post_thread_continuation_entries,
3145 'sectionListContinuation': extract_entries, # for feeds
3146 }
8bdd16b4 3147 continuation_contents = try_get(
69184e41 3148 response, lambda x: x['continuationContents'], dict) or {}
3149 continuation_renderer = None
3150 for key, value in continuation_contents.items():
3151 if key not in known_continuation_renderers:
3462ffa8 3152 continue
69184e41 3153 continuation_renderer = value
3154 continuation_list = [None]
3155 for entry in known_continuation_renderers[key](continuation_renderer):
3156 yield entry
3157 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3158 break
3159 if continuation_renderer:
3160 continue
c5e8d7af 3161
a1b535bd 3162 known_renderers = {
3163 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3164 'gridVideoRenderer': (self._grid_entries, 'items'),
3165 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3166 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3167 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3168 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3169 }
cce889b9 3170 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3171 continuation_items = try_get(
cce889b9 3172 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3173 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3174 video_items_renderer = None
3175 for key, value in continuation_item.items():
3176 if key not in known_renderers:
8bdd16b4 3177 continue
a1b535bd 3178 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3179 continuation_list = [None]
a1b535bd 3180 for entry in known_renderers[key][0](video_items_renderer):
3181 yield entry
9ba5705a 3182 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3183 break
3184 if video_items_renderer:
3185 continue
8bdd16b4 3186 break
9558dcec 3187
8bdd16b4 3188 @staticmethod
3189 def _extract_selected_tab(tabs):
3190 for tab in tabs:
3191 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3192 return tab['tabRenderer']
2b3c2546 3193 else:
8bdd16b4 3194 raise ExtractorError('Unable to find selected tab')
b82f815f 3195
8bdd16b4 3196 @staticmethod
3197 def _extract_uploader(data):
3198 uploader = {}
3199 sidebar_renderer = try_get(
3200 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3201 if sidebar_renderer:
3202 for item in sidebar_renderer:
3203 if not isinstance(item, dict):
3204 continue
3205 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3206 if not isinstance(renderer, dict):
3207 continue
3208 owner = try_get(
3209 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3210 if owner:
3211 uploader['uploader'] = owner.get('text')
3212 uploader['uploader_id'] = try_get(
3213 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3214 uploader['uploader_url'] = urljoin(
3215 'https://www.youtube.com/',
3216 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3217 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3218
d069eca7 3219 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3220 playlist_id = title = description = channel_url = channel_name = channel_id = None
3221 thumbnails_list = tags = []
3222
8bdd16b4 3223 selected_tab = self._extract_selected_tab(tabs)
3224 renderer = try_get(
3225 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3226 if renderer:
b60419c5 3227 channel_name = renderer.get('title')
3228 channel_url = renderer.get('channelUrl')
3229 channel_id = renderer.get('externalId')
39ed931e 3230 else:
64c0d954 3231 renderer = try_get(
3232 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3233
8bdd16b4 3234 if renderer:
3235 title = renderer.get('title')
ecc97af3 3236 description = renderer.get('description', '')
b60419c5 3237 playlist_id = channel_id
3238 tags = renderer.get('keywords', '').split()
3239 thumbnails_list = (
3240 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3241 or try_get(
3242 data,
3243 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3244 list)
b60419c5 3245 or [])
3246
3247 thumbnails = []
3248 for t in thumbnails_list:
3249 if not isinstance(t, dict):
3250 continue
3251 thumbnail_url = url_or_none(t.get('url'))
3252 if not thumbnail_url:
3253 continue
3254 thumbnails.append({
3255 'url': thumbnail_url,
3256 'width': int_or_none(t.get('width')),
3257 'height': int_or_none(t.get('height')),
3258 })
3462ffa8 3259 if playlist_id is None:
70d5c17b 3260 playlist_id = item_id
3261 if title is None:
39ed931e 3262 title = (
3263 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3264 or playlist_id)
b60419c5 3265 title += format_field(selected_tab, 'title', ' - %s')
3266
3267 metadata = {
3268 'playlist_id': playlist_id,
3269 'playlist_title': title,
3270 'playlist_description': description,
3271 'uploader': channel_name,
3272 'uploader_id': channel_id,
3273 'uploader_url': channel_url,
3274 'thumbnails': thumbnails,
3275 'tags': tags,
3276 }
3277 if not channel_id:
3278 metadata.update(self._extract_uploader(data))
3279 metadata.update({
3280 'channel': metadata['uploader'],
3281 'channel_id': metadata['uploader_id'],
3282 'channel_url': metadata['uploader_url']})
3283 return self.playlist_result(
d069eca7
M
3284 self._entries(
3285 selected_tab, playlist_id,
3286 self._extract_identity_token(webpage, item_id),
f4f751af 3287 self._extract_account_syncid(data),
3288 self._extract_ytcfg(item_id, webpage)),
b60419c5 3289 **metadata)
73c4ac2c 3290
79360d99 3291 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3292 first_id = last_id = None
79360d99 3293 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3294 headers = self._generate_api_headers(
3295 ytcfg, account_syncid=self._extract_account_syncid(data),
3296 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3297 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3298 for page_num in itertools.count(1):
cd7c66cf 3299 videos = list(self._playlist_entries(playlist))
3300 if not videos:
3301 return
2be71994 3302 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3303 if start >= len(videos):
3304 return
3305 for video in videos[start:]:
3306 if video['id'] == first_id:
3307 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3308 return
3309 yield video
3310 first_id = first_id or videos[0]['id']
3311 last_id = videos[-1]['id']
79360d99 3312 watch_endpoint = try_get(
3313 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3314 query = {
3315 'playlistId': playlist_id,
3316 'videoId': watch_endpoint.get('videoId') or last_id,
3317 'index': watch_endpoint.get('index') or len(videos),
3318 'params': watch_endpoint.get('params') or 'OAE%3D'
3319 }
3320 response = self._extract_response(
3321 item_id='%s page %d' % (playlist_id, page_num),
3322 query=query,
3323 ep='next',
3324 headers=headers,
3325 check_get_keys='contents'
3326 )
cd7c66cf 3327 playlist = try_get(
79360d99 3328 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3329
79360d99 3330 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3331 title = playlist.get('title') or try_get(
3332 data, lambda x: x['titleText']['simpleText'], compat_str)
3333 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3334
3335 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3336 playlist_url = urljoin(url, try_get(
3337 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3338 compat_str))
3339 if playlist_url and playlist_url != url:
3340 return self.url_result(
3341 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3342 video_title=title)
cd7c66cf 3343
8bdd16b4 3344 return self.playlist_result(
79360d99 3345 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3346 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3347
f3eaa8dd
M
3348 def _extract_alerts(self, data, expected=False):
3349
3350 def _real_extract_alerts():
3351 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3352 if not isinstance(alert_dict, dict):
02ced43c 3353 continue
f3eaa8dd
M
3354 for alert in alert_dict.values():
3355 alert_type = alert.get('type')
3356 if not alert_type:
3357 continue
3ffc7c89 3358 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
02ced43c 3359 if message:
3360 yield alert_type, message
f3eaa8dd 3361 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3ffc7c89 3362 message += try_get(run, lambda x: x['text'], compat_str)
3363 if message:
3364 yield alert_type, message
f3eaa8dd 3365
3ffc7c89 3366 errors = []
3367 warnings = []
f3eaa8dd
M
3368 for alert_type, alert_message in _real_extract_alerts():
3369 if alert_type.lower() == 'error':
3ffc7c89 3370 errors.append([alert_type, alert_message])
f3eaa8dd 3371 else:
3ffc7c89 3372 warnings.append([alert_type, alert_message])
f3eaa8dd 3373
3ffc7c89 3374 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3375 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3376 if errors:
3377 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3378
358de58c 3379 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3380 """
3381 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3382 """
3383 sidebar_renderer = try_get(
5d342002 3384 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3385 if not sidebar_renderer:
3386 return
3387 browse_id = params = None
358de58c 3388 for item in sidebar_renderer:
3389 if not isinstance(item, dict):
3390 continue
3391 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3392 menu_renderer = try_get(
3393 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3394 for menu_item in menu_renderer:
3395 if not isinstance(menu_item, dict):
3396 continue
3397 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3398 text = try_get(
3399 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3400 if not text or text.lower() != 'show unavailable videos':
3401 continue
3402 browse_endpoint = try_get(
3403 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3404 browse_id = browse_endpoint.get('browseId')
3405 params = browse_endpoint.get('params')
5d342002 3406 break
3407
3408 ytcfg = self._extract_ytcfg(item_id, webpage)
3409 headers = self._generate_api_headers(
3410 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3411 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3412 visitor_data=try_get(
3413 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3414 query = {
3415 'params': params or 'wgYCCAA=',
3416 'browseId': browse_id or 'VL%s' % item_id
3417 }
3418 return self._extract_response(
3419 item_id=item_id, headers=headers, query=query,
3420 check_get_keys='contents', fatal=False,
3421 note='Downloading API JSON with unavailable videos')
358de58c 3422
79360d99 3423 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3424 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3425 response = None
3426 last_error = None
3427 count = -1
3428 retries = self._downloader.params.get('extractor_retries', 3)
3429 if check_get_keys is None:
3430 check_get_keys = []
3431 while count < retries:
3432 count += 1
3433 if last_error:
3434 self.report_warning('%s. Retrying ...' % last_error)
3435 try:
3436 response = self._call_api(
3437 ep=ep, fatal=True, headers=headers,
358de58c 3438 video_id=item_id, query=query,
79360d99 3439 context=self._extract_context(ytcfg),
3440 api_key=self._extract_api_key(ytcfg),
3441 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3442 except ExtractorError as e:
3443 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3444 # Downloading page may result in intermittent 5xx HTTP error
3445 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3446 last_error = 'HTTP Error %s' % e.cause.code
3447 if count < retries:
3448 continue
358de58c 3449 if fatal:
3450 raise
3451 else:
3452 self.report_warning(error_to_compat_str(e))
3453 return
3454
79360d99 3455 else:
3456 # Youtube may send alerts if there was an issue with the continuation page
3457 self._extract_alerts(response, expected=False)
3458 if not check_get_keys or dict_get(response, check_get_keys):
3459 break
3460 # Youtube sometimes sends incomplete data
3461 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3462 last_error = 'Incomplete data received'
3463 if count >= retries:
358de58c 3464 if fatal:
3465 raise ExtractorError(last_error)
3466 else:
3467 self.report_warning(last_error)
3468 return
79360d99 3469 return response
3470
cd7c66cf 3471 def _extract_webpage(self, url, item_id):
62bff2c1 3472 retries = self._downloader.params.get('extractor_retries', 3)
3473 count = -1
c705177d 3474 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3475 while count < retries:
62bff2c1 3476 count += 1
14fdfea9 3477 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3478 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3479 if count:
c705177d 3480 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3481 webpage = self._download_webpage(
3482 url, item_id,
cd7c66cf 3483 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3484 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3485 self._extract_alerts(data, expected=True)
14fdfea9 3486 if data.get('contents') or data.get('currentVideoEndpoint'):
3487 break
c705177d 3488 if count >= retries:
6a39ee13 3489 raise ExtractorError(last_error)
cd7c66cf 3490 return webpage, data
3491
3492 def _real_extract(self, url):
3493 item_id = self._match_id(url)
3494 url = compat_urlparse.urlunparse(
3495 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
53ed7066 3496 compat_opts = self._downloader.params.get('compat_opts', [])
cd7c66cf 3497
3498 # This is not matched in a channel page with a tab selected
3499 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3500 mobj = mobj.groupdict() if mobj else {}
53ed7066 3501 if mobj and not mobj.get('not_channel') and 'no-youtube-channel-redirect' not in compat_opts:
6a39ee13 3502 self.report_warning(
cd7c66cf 3503 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3504 'To download only the videos in the home page, add a "/featured" to the URL')
3505 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3506
3507 # Handle both video/playlist URLs
201c1459 3508 qs = parse_qs(url)
cd7c66cf 3509 video_id = qs.get('v', [None])[0]
3510 playlist_id = qs.get('list', [None])[0]
3511
3512 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3513 if not playlist_id:
3514 # If there is neither video or playlist ids,
3515 # youtube redirects to home page, which is undesirable
3516 raise ExtractorError('Unable to recognize tab page')
6a39ee13 3517 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3518 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3519
3520 if video_id and playlist_id:
3521 if self._downloader.params.get('noplaylist'):
3522 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3523 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3524 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3525
3526 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3527
358de58c 3528 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3529 if 'no-youtube-unavailable-videos' not in compat_opts:
3530 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
358de58c 3531
8bdd16b4 3532 tabs = try_get(
3533 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3534 if tabs:
d069eca7 3535 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3536
8bdd16b4 3537 playlist = try_get(
3538 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3539 if playlist:
79360d99 3540 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3541
a0566bbf 3542 video_id = try_get(
3543 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3544 compat_str) or video_id
8bdd16b4 3545 if video_id:
6a39ee13 3546 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3547 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3548
8bdd16b4 3549 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3550
c5e8d7af 3551
8bdd16b4 3552class YoutubePlaylistIE(InfoExtractor):
3553 IE_DESC = 'YouTube.com playlists'
3554 _VALID_URL = r'''(?x)(?:
3555 (?:https?://)?
3556 (?:\w+\.)?
3557 (?:
3558 (?:
3559 youtube(?:kids)?\.com|
29f7c58a 3560 invidio\.us
8bdd16b4 3561 )
3562 /.*?\?.*?\blist=
3563 )?
3564 (?P<id>%(playlist_id)s)
3565 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3566 IE_NAME = 'youtube:playlist'
cdc628a4 3567 _TESTS = [{
8bdd16b4 3568 'note': 'issue #673',
3569 'url': 'PLBB231211A4F62143',
cdc628a4 3570 'info_dict': {
8bdd16b4 3571 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3572 'id': 'PLBB231211A4F62143',
3573 'uploader': 'Wickydoo',
3574 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3575 },
3576 'playlist_mincount': 29,
3577 }, {
3578 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3579 'info_dict': {
3580 'title': 'YDL_safe_search',
3581 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3582 },
3583 'playlist_count': 2,
3584 'skip': 'This playlist is private',
9558dcec 3585 }, {
8bdd16b4 3586 'note': 'embedded',
3587 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3588 'playlist_count': 4,
9558dcec 3589 'info_dict': {
8bdd16b4 3590 'title': 'JODA15',
3591 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3592 'uploader': 'milan',
3593 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3594 }
cdc628a4 3595 }, {
8bdd16b4 3596 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3597 'playlist_mincount': 982,
3598 'info_dict': {
3599 'title': '2018 Chinese New Singles (11/6 updated)',
3600 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3601 'uploader': 'LBK',
3602 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3603 }
daa0df9e 3604 }, {
29f7c58a 3605 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3606 'only_matching': True,
3607 }, {
3608 # music album playlist
3609 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3610 'only_matching': True,
3611 }]
3612
3613 @classmethod
3614 def suitable(cls, url):
201c1459 3615 if YoutubeTabIE.suitable(url):
3616 return False
1bdae7d3 3617 # Hack for lazy extractors until more generic solution is implemented
3618 # (see #28780)
3619 from .youtube import parse_qs
201c1459 3620 qs = parse_qs(url)
3621 if qs.get('v', [None])[0]:
3622 return False
3623 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3624
3625 def _real_extract(self, url):
3626 playlist_id = self._match_id(url)
201c1459 3627 qs = parse_qs(url)
29f7c58a 3628 if not qs:
3629 qs = {'list': playlist_id}
3630 return self.url_result(
3631 update_url_query('https://www.youtube.com/playlist', qs),
3632 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3633
3634
3635class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3636 IE_DESC = 'youtu.be'
29f7c58a 3637 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3638 _TESTS = [{
8bdd16b4 3639 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3640 'info_dict': {
3641 'id': 'yeWKywCrFtk',
3642 'ext': 'mp4',
3643 'title': 'Small Scale Baler and Braiding Rugs',
3644 'uploader': 'Backus-Page House Museum',
3645 'uploader_id': 'backuspagemuseum',
3646 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3647 'upload_date': '20161008',
3648 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3649 'categories': ['Nonprofits & Activism'],
3650 'tags': list,
3651 'like_count': int,
3652 'dislike_count': int,
3653 },
3654 'params': {
3655 'noplaylist': True,
3656 'skip_download': True,
3657 },
39e7107d 3658 }, {
8bdd16b4 3659 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3660 'only_matching': True,
cdc628a4
PH
3661 }]
3662
8bdd16b4 3663 def _real_extract(self, url):
29f7c58a 3664 mobj = re.match(self._VALID_URL, url)
3665 video_id = mobj.group('id')
3666 playlist_id = mobj.group('playlist_id')
8bdd16b4 3667 return self.url_result(
29f7c58a 3668 update_url_query('https://www.youtube.com/watch', {
3669 'v': video_id,
3670 'list': playlist_id,
3671 'feature': 'youtu.be',
3672 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3673
3674
3675class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3676 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3677 _VALID_URL = r'ytuser:(?P<id>.+)'
3678 _TESTS = [{
3679 'url': 'ytuser:phihag',
3680 'only_matching': True,
3681 }]
3682
3683 def _real_extract(self, url):
3684 user_id = self._match_id(url)
3685 return self.url_result(
3686 'https://www.youtube.com/user/%s' % user_id,
3687 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3688
b05654f0 3689
3d3dddc9 3690class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3691 IE_NAME = 'youtube:favorites'
3692 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3693 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3694 _LOGIN_REQUIRED = True
3695 _TESTS = [{
3696 'url': ':ytfav',
3697 'only_matching': True,
3698 }, {
3699 'url': ':ytfavorites',
3700 'only_matching': True,
3701 }]
3702
3703 def _real_extract(self, url):
3704 return self.url_result(
3705 'https://www.youtube.com/playlist?list=LL',
3706 ie=YoutubeTabIE.ie_key())
3707
3708
79360d99 3709class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3710 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3711 # there doesn't appear to be a real limit, for example if you search for
3712 # 'python' you get more than 8.000.000 results
3713 _MAX_RESULTS = float('inf')
78caa52a 3714 IE_NAME = 'youtube:search'
b05654f0 3715 _SEARCH_KEY = 'ytsearch'
6c894ea1 3716 _SEARCH_PARAMS = None
9dd8e46a 3717 _TESTS = []
b05654f0 3718
6c894ea1 3719 def _entries(self, query, n):
a5c56234 3720 data = {'query': query}
6c894ea1
U
3721 if self._SEARCH_PARAMS:
3722 data['params'] = self._SEARCH_PARAMS
3723 total = 0
3724 for page_num in itertools.count(1):
79360d99 3725 search = self._extract_response(
3726 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3727 check_get_keys=('contents', 'onResponseReceivedCommands')
3728 )
6c894ea1 3729 if not search:
b4c08069 3730 break
6c894ea1
U
3731 slr_contents = try_get(
3732 search,
3733 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3734 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3735 list)
3736 if not slr_contents:
a22b2fd1 3737 break
0366ae87 3738
0366ae87
M
3739 # Youtube sometimes adds promoted content to searches,
3740 # changing the index location of videos and token.
3741 # So we search through all entries till we find them.
30a074c2 3742 continuation_token = None
3743 for slr_content in slr_contents:
a96c6d15 3744 if continuation_token is None:
3745 continuation_token = try_get(
3746 slr_content,
3747 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3748 compat_str)
3749
30a074c2 3750 isr_contents = try_get(
3751 slr_content,
3752 lambda x: x['itemSectionRenderer']['contents'],
3753 list)
9da76d30 3754 if not isr_contents:
30a074c2 3755 continue
3756 for content in isr_contents:
3757 if not isinstance(content, dict):
3758 continue
3759 video = content.get('videoRenderer')
3760 if not isinstance(video, dict):
3761 continue
3762 video_id = video.get('videoId')
3763 if not video_id:
3764 continue
3765
3766 yield self._extract_video(video)
3767 total += 1
3768 if total == n:
3769 return
0366ae87 3770
0366ae87 3771 if not continuation_token:
6c894ea1 3772 break
0366ae87 3773 data['continuation'] = continuation_token
b05654f0 3774
6c894ea1
U
3775 def _get_n_results(self, query, n):
3776 """Get a specified number of results for a query"""
3777 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3778
c9ae7b95 3779
a3dd9248 3780class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3781 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3782 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3783 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3784 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3785
c9ae7b95 3786
386e1dd9 3787class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3788 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3789 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3790 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3791 # _MAX_RESULTS = 100
3462ffa8 3792 _TESTS = [{
3793 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3794 'playlist_mincount': 5,
3795 'info_dict': {
3796 'title': 'youtube-dl test video',
3797 }
3798 }, {
3799 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3800 'only_matching': True,
3801 }]
3802
386e1dd9 3803 @classmethod
3804 def _make_valid_url(cls):
3805 return cls._VALID_URL
3806
3462ffa8 3807 def _real_extract(self, url):
386e1dd9 3808 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3809 query = (qs.get('search_query') or qs.get('q'))[0]
3810 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3811 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3812
3813
3814class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3815 """
25f14e9f 3816 Base class for feed extractors
3d3dddc9 3817 Subclasses must define the _FEED_NAME property.
d7ae0639 3818 """
b2e8bc1b 3819 _LOGIN_REQUIRED = True
ef2f3c7f 3820 _TESTS = []
d7ae0639
JMF
3821
3822 @property
3823 def IE_NAME(self):
78caa52a 3824 return 'youtube:%s' % self._FEED_NAME
04cc9617 3825
81f0259b 3826 def _real_initialize(self):
b2e8bc1b 3827 self._login()
81f0259b 3828
3853309f 3829 def _real_extract(self, url):
3d3dddc9 3830 return self.url_result(
3831 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3832 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3833
3834
ef2f3c7f 3835class YoutubeWatchLaterIE(InfoExtractor):
3836 IE_NAME = 'youtube:watchlater'
70d5c17b 3837 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3838 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3839 _TESTS = [{
8bdd16b4 3840 'url': ':ytwatchlater',
bc7a9cd8
S
3841 'only_matching': True,
3842 }]
25f14e9f
S
3843
3844 def _real_extract(self, url):
ef2f3c7f 3845 return self.url_result(
3846 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3847
3848
25f14e9f
S
3849class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3850 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3851 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3852 _FEED_NAME = 'recommended'
3d3dddc9 3853 _TESTS = [{
3854 'url': ':ytrec',
3855 'only_matching': True,
3856 }, {
3857 'url': ':ytrecommended',
3858 'only_matching': True,
3859 }, {
3860 'url': 'https://youtube.com',
3861 'only_matching': True,
3862 }]
1ed5b5c9 3863
1ed5b5c9 3864
25f14e9f 3865class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3866 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3867 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3868 _FEED_NAME = 'subscriptions'
3d3dddc9 3869 _TESTS = [{
3870 'url': ':ytsubs',
3871 'only_matching': True,
3872 }, {
3873 'url': ':ytsubscriptions',
3874 'only_matching': True,
3875 }]
1ed5b5c9 3876
1ed5b5c9 3877
25f14e9f 3878class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3879 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3880 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3881 _FEED_NAME = 'history'
3d3dddc9 3882 _TESTS = [{
3883 'url': ':ythistory',
3884 'only_matching': True,
3885 }]
1ed5b5c9
JMF
3886
3887
15870e90
PH
3888class YoutubeTruncatedURLIE(InfoExtractor):
3889 IE_NAME = 'youtube:truncated_url'
3890 IE_DESC = False # Do not list
975d35db 3891 _VALID_URL = r'''(?x)
b95aab84
PH
3892 (?:https?://)?
3893 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3894 (?:watch\?(?:
c4808c60 3895 feature=[a-z_]+|
b95aab84
PH
3896 annotation_id=annotation_[^&]+|
3897 x-yt-cl=[0-9]+|
c1708b89 3898 hl=[^&]*|
287be8c6 3899 t=[0-9]+
b95aab84
PH
3900 )?
3901 |
3902 attribution_link\?a=[^&]+
3903 )
3904 $
975d35db 3905 '''
15870e90 3906
c4808c60 3907 _TESTS = [{
2d3d2997 3908 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3909 'only_matching': True,
dc2fc736 3910 }, {
2d3d2997 3911 'url': 'https://www.youtube.com/watch?',
dc2fc736 3912 'only_matching': True,
b95aab84
PH
3913 }, {
3914 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3915 'only_matching': True,
3916 }, {
3917 'url': 'https://www.youtube.com/watch?feature=foo',
3918 'only_matching': True,
c1708b89
PH
3919 }, {
3920 'url': 'https://www.youtube.com/watch?hl=en-GB',
3921 'only_matching': True,
287be8c6
PH
3922 }, {
3923 'url': 'https://www.youtube.com/watch?t=2372',
3924 'only_matching': True,
c4808c60
PH
3925 }]
3926
15870e90
PH
3927 def _real_extract(self, url):
3928 raise ExtractorError(
78caa52a
PH
3929 'Did you forget to quote the URL? Remember that & is a meta '
3930 'character in most shells, so you want to put the URL in quotes, '
3867038a 3931 'like youtube-dl '
2d3d2997 3932 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3933 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3934 expected=True)
772fd5cc
PH
3935
3936
3937class YoutubeTruncatedIDIE(InfoExtractor):
3938 IE_NAME = 'youtube:truncated_id'
3939 IE_DESC = False # Do not list
b95aab84 3940 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3941
3942 _TESTS = [{
3943 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3944 'only_matching': True,
3945 }]
3946
3947 def _real_extract(self, url):
3948 video_id = self._match_id(url)
3949 raise ExtractorError(
3950 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3951 expected=True)