]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube] Add language names
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
cd7c66cf 70 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
68b91dc9 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
68217024 88 username, password = self._get_login_info()
b2e8bc1b
JMF
89 # No authentication to be performed
90 if username is None:
70d35d16 91 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 93 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
94 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 95 return True
b2e8bc1b 96
7cc3570e
PH
97 login_page = self._download_webpage(
98 self._LOGIN_URL, None,
69ea8ca4
PH
99 note='Downloading login page',
100 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
101 if login_page is False:
102 return
b2e8bc1b 103
1212e997 104 login_form = self._hidden_inputs(login_page)
c5e8d7af 105
e00eb564
S
106 def req(url, f_req, note, errnote):
107 data = login_form.copy()
108 data.update({
109 'pstMsg': 1,
110 'checkConnection': 'youtube',
111 'checkedDomains': 'youtube',
112 'hl': 'en',
113 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 114 'f.req': json.dumps(f_req),
e00eb564
S
115 'flowName': 'GlifWebSignIn',
116 'flowEntry': 'ServiceLogin',
baf67a60
S
117 # TODO: reverse actual botguard identifier generation algo
118 'bgRequest': '["identifier",""]',
041bc3ad 119 })
e00eb564
S
120 return self._download_json(
121 url, None, note=note, errnote=errnote,
122 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
123 fatal=False,
124 data=urlencode_postdata(data), headers={
125 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
126 'Google-Accounts-XSRF': 1,
127 })
128
3995d37d 129 def warn(message):
6a39ee13 130 self.report_warning(message)
3995d37d
S
131
132 lookup_req = [
133 username,
134 None, [], None, 'US', None, None, 2, False, True,
135 [
136 None, None,
137 [2, 1, None, 1,
138 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
139 None, [], 4],
140 1, [None, None, []], None, None, None, True
141 ],
142 username,
143 ]
144
e00eb564 145 lookup_results = req(
3995d37d 146 self._LOOKUP_URL, lookup_req,
e00eb564
S
147 'Looking up account info', 'Unable to look up account info')
148
149 if lookup_results is False:
150 return False
041bc3ad 151
3995d37d
S
152 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
153 if not user_hash:
154 warn('Unable to extract user hash')
155 return False
156
157 challenge_req = [
158 user_hash,
159 None, 1, None, [1, None, None, None, [password, None, True]],
160 [
161 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
162 1, [None, None, []], None, None, None, True
163 ]]
83317f69 164
3995d37d
S
165 challenge_results = req(
166 self._CHALLENGE_URL, challenge_req,
167 'Logging in', 'Unable to log in')
83317f69 168
3995d37d 169 if challenge_results is False:
e00eb564 170 return
83317f69 171
3995d37d
S
172 login_res = try_get(challenge_results, lambda x: x[0][5], list)
173 if login_res:
174 login_msg = try_get(login_res, lambda x: x[5], compat_str)
175 warn(
176 'Unable to login: %s' % 'Invalid password'
177 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
178 return False
179
180 res = try_get(challenge_results, lambda x: x[0][-1], list)
181 if not res:
182 warn('Unable to extract result entry')
183 return False
184
9a6628aa
S
185 login_challenge = try_get(res, lambda x: x[0][0], list)
186 if login_challenge:
187 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
188 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
189 # SEND_SUCCESS - TFA code has been successfully sent to phone
190 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 191 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
192 if status == 'QUOTA_EXCEEDED':
193 warn('Exceeded the limit of TFA codes, try later')
194 return False
195
196 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
197 if not tl:
198 warn('Unable to extract TL')
199 return False
200
201 tfa_code = self._get_tfa_info('2-step verification code')
202
203 if not tfa_code:
204 warn(
205 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
206 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
207 return False
208
209 tfa_code = remove_start(tfa_code, 'G-')
210
211 tfa_req = [
212 user_hash, None, 2, None,
213 [
214 9, None, None, None, None, None, None, None,
215 [None, tfa_code, True, 2]
216 ]]
217
218 tfa_results = req(
219 self._TFA_URL.format(tl), tfa_req,
220 'Submitting TFA code', 'Unable to submit TFA code')
221
222 if tfa_results is False:
223 return False
224
225 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
226 if tfa_res:
227 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
228 warn(
229 'Unable to finish TFA: %s' % 'Invalid TFA code'
230 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
231 return False
232
233 check_cookie_url = try_get(
234 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
235 else:
236 CHALLENGES = {
237 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
238 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
239 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
240 }
241 challenge = CHALLENGES.get(
242 challenge_str,
243 '%s returned error %s.' % (self.IE_NAME, challenge_str))
244 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
245 return False
3995d37d
S
246 else:
247 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
248
249 if not check_cookie_url:
250 warn('Unable to extract CheckCookie URL')
251 return False
e00eb564
S
252
253 check_cookie_results = self._download_webpage(
3995d37d
S
254 check_cookie_url, None, 'Checking cookie', fatal=False)
255
256 if check_cookie_results is False:
257 return False
e00eb564 258
3995d37d
S
259 if 'https://myaccount.google.com/' not in check_cookie_results:
260 warn('Unable to log in')
b2e8bc1b 261 return False
e00eb564 262
b2e8bc1b
JMF
263 return True
264
cce889b9 265 def _initialize_consent(self):
266 cookies = self._get_cookies('https://www.youtube.com/')
267 if cookies.get('__Secure-3PSID'):
268 return
269 consent_id = None
270 consent = cookies.get('CONSENT')
271 if consent:
272 if 'YES' in consent.value:
273 return
274 consent_id = self._search_regex(
275 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
276 if not consent_id:
277 consent_id = random.randint(100, 999)
278 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 279
b2e8bc1b 280 def _real_initialize(self):
cce889b9 281 self._initialize_consent()
b2e8bc1b
JMF
282 if self._downloader is None:
283 return
b2e8bc1b
JMF
284 if not self._login():
285 return
c5e8d7af 286
f4f751af 287 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
288 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 289 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 290 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
291 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 292
a5c56234
M
293 def _generate_sapisidhash_header(self):
294 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
295 if sapisid_cookie is None:
296 return
297 time_now = round(time.time())
298 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
299 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
300
301 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 302 note='Downloading API JSON', errnote='Unable to download API page',
303 context=None, api_key=None):
304
305 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 306 data.update(query)
f4f751af 307 real_headers = self._generate_api_headers()
308 real_headers.update({'content-type': 'application/json'})
309 if headers:
310 real_headers.update(headers)
545cc85d 311 return self._download_json(
a5c56234
M
312 'https://www.youtube.com/youtubei/v1/%s' % ep,
313 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 314 data=json.dumps(data).encode('utf8'), headers=real_headers,
315 query={'key': api_key or self._extract_api_key()})
316
317 def _extract_api_key(self, ytcfg=None):
318 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 319
8bdd16b4 320 def _extract_yt_initial_data(self, video_id, webpage):
321 return self._parse_json(
322 self._search_regex(
29f7c58a 323 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 324 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 325 video_id)
0c148415 326
a1c5d2ca
M
327 def _extract_identity_token(self, webpage, item_id):
328 ytcfg = self._extract_ytcfg(item_id, webpage)
329 if ytcfg:
330 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
331 if token:
332 return token
333 return self._search_regex(
334 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
335 'identity token', default=None)
336
337 @staticmethod
338 def _extract_account_syncid(data):
8ea3f7b9 339 """
340 Extract syncId required to download private playlists of secondary channels
341 @param data Either response or ytcfg
342 """
343 sync_ids = (try_get(
344 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
345 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
346 if len(sync_ids) >= 2 and sync_ids[1]:
347 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
348 # and just "user_syncid||" for primary channel. We only want the channel_syncid
349 return sync_ids[0]
8ea3f7b9 350 # ytcfg includes channel_syncid if on secondary channel
351 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 352
29f7c58a 353 def _extract_ytcfg(self, video_id, webpage):
8c54a305 354 if not webpage:
355 return {}
29f7c58a 356 return self._parse_json(
357 self._search_regex(
358 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 359 default='{}'), video_id, fatal=False) or {}
360
361 def __extract_client_version(self, ytcfg):
362 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
363
364 def _extract_context(self, ytcfg=None):
365 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
366 if context:
367 return context
368
369 # Recreate the client context (required)
370 client_version = self.__extract_client_version(ytcfg)
371 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
372 context = {
373 'client': {
374 'clientName': client_name,
375 'clientVersion': client_version,
376 }
377 }
378 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
379 if visitor_data:
380 context['client']['visitorData'] = visitor_data
381 return context
382
383 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
384 headers = {
385 'X-YouTube-Client-Name': '1',
386 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
387 }
388 if identity_token:
389 headers['x-youtube-identity-token'] = identity_token
390 if account_syncid:
391 headers['X-Goog-PageId'] = account_syncid
392 headers['X-Goog-AuthUser'] = 0
393 if visitor_data:
394 headers['x-goog-visitor-id'] = visitor_data
395 auth = self._generate_sapisidhash_header()
396 if auth is not None:
397 headers['Authorization'] = auth
398 headers['X-Origin'] = 'https://www.youtube.com'
399 return headers
29f7c58a 400
30a074c2 401 def _extract_video(self, renderer):
402 video_id = renderer.get('videoId')
403 title = try_get(
404 renderer,
405 (lambda x: x['title']['runs'][0]['text'],
406 lambda x: x['title']['simpleText']), compat_str)
407 description = try_get(
408 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
409 compat_str)
410 duration = parse_duration(try_get(
411 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
412 view_count_text = try_get(
413 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
414 view_count = str_to_int(self._search_regex(
415 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
416 'view count', default=None))
417 uploader = try_get(
bc2ca1bb 418 renderer,
419 (lambda x: x['ownerText']['runs'][0]['text'],
420 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 421 return {
39ed931e 422 '_type': 'url',
30a074c2 423 'ie_key': YoutubeIE.ie_key(),
424 'id': video_id,
425 'url': video_id,
426 'title': title,
427 'description': description,
428 'duration': duration,
429 'view_count': view_count,
430 'uploader': uploader,
431 }
432
0c148415 433
360e1ca5 434class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 435 IE_DESC = 'YouTube.com'
bc2ca1bb 436 _INVIDIOUS_SITES = (
437 # invidious-redirect websites
438 r'(?:www\.)?redirect\.invidious\.io',
439 r'(?:(?:www|dev)\.)?invidio\.us',
440 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
441 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 442 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 443 r'(?:(?:www|au)\.)?ytprivate\.com',
444 r'(?:www\.)?invidious\.namazso\.eu',
445 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 446 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
447 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
448 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
449 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
450 # youtube-dl invidious instances list
451 r'(?:(?:www|no)\.)?invidiou\.sh',
452 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
453 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 454 r'(?:www\.)?invidious\.mastodon\.host',
455 r'(?:www\.)?invidious\.zapashcanon\.fr',
456 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 457 r'(?:www\.)?invidious\.tinfoil-hat\.net',
458 r'(?:www\.)?invidious\.himiko\.cloud',
459 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 460 r'(?:www\.)?invidious\.tube',
461 r'(?:www\.)?invidiou\.site',
462 r'(?:www\.)?invidious\.site',
463 r'(?:www\.)?invidious\.xyz',
464 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 465 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 466 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 467 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 468 r'(?:www\.)?tube\.poal\.co',
469 r'(?:www\.)?tube\.connect\.cafe',
470 r'(?:www\.)?vid\.wxzm\.sx',
471 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 472 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 473 r'(?:www\.)?yewtu\.be',
474 r'(?:www\.)?yt\.elukerio\.org',
475 r'(?:www\.)?yt\.lelux\.fi',
476 r'(?:www\.)?invidious\.ggc-project\.de',
477 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 478 r'(?:www\.)?ytprivate\.com',
479 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 480 r'(?:www\.)?invidious\.toot\.koeln',
481 r'(?:www\.)?invidious\.fdn\.fr',
482 r'(?:www\.)?watch\.nettohikari\.com',
483 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
484 r'(?:www\.)?qklhadlycap4cnod\.onion',
485 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
486 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
487 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
488 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
489 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
490 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
491 )
cb7dfeea 492 _VALID_URL = r"""(?x)^
c5e8d7af 493 (
edb53e2d 494 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 495 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
496 (?:www\.)?deturl\.com/www\.youtube\.com|
497 (?:www\.)?pwnyoutube\.com|
498 (?:www\.)?hooktube\.com|
499 (?:www\.)?yourepeat\.com|
500 tube\.majestyc\.net|
501 %(invidious)s|
502 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
503 (?:.*?\#/)? # handle anchor (#/) redirect urls
504 (?: # the various things that can precede the ID:
ac7553d0 505 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 506 |(?: # or the v= param in all its forms
f7000f3a 507 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 508 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 509 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
510 v=
511 )
f4b05232 512 ))
cbaed4bb
S
513 |(?:
514 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
515 vid\.plus| # or vid.plus/xxxx
516 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 517 %(invidious)s
cbaed4bb 518 )/
edb53e2d 519 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 520 )
c5e8d7af 521 )? # all until now is optional -> you can pass the naked ID
201c1459 522 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 523 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 524 $""" % {
bc2ca1bb 525 'invidious': '|'.join(_INVIDIOUS_SITES),
526 }
e40c758c 527 _PLAYER_INFO_RE = (
cc2db878 528 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
529 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 530 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 531 )
2c62dc26 532 _formats = {
c2d3cb4c 533 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
534 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
535 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
536 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
537 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
538 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
539 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
540 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 541 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 542 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
543 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
544 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
545 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
546 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
547 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 548 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 549 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
550 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 551
552
553 # 3D videos
c2d3cb4c 554 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
555 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
556 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
557 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 558 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
559 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
560 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 561
96fb5605 562 # Apple HTTP Live Streaming
11f12195 563 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 564 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
565 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
566 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
567 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
568 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 569 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
570 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
571
572 # DASH mp4 video
d23028a8
S
573 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
574 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
575 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
576 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
577 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 578 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
579 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
580 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
581 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
582 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
583 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
584 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 585
f6f1fc92 586 # Dash mp4 audio
d23028a8
S
587 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
588 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
589 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
590 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
591 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
592 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
593 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
594
595 # Dash webm
d23028a8
S
596 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
597 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
598 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
599 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
600 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
601 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
602 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
603 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
604 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
605 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
606 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
607 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
608 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
609 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
610 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 611 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
612 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
613 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
614 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
615 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
616 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
617 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
618
619 # Dash webm audio
d23028a8
S
620 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
621 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 622
0857baad 623 # Dash webm audio with opus inside
d23028a8
S
624 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
625 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
626 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 627
ce6b9a2d
PH
628 # RTMP (unnamed)
629 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
630
631 # av01 video only formats sometimes served with "unknown" codecs
632 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
633 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
634 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
635 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 636 }
29f7c58a 637 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 638
fd5c4aab
S
639 _GEO_BYPASS = False
640
78caa52a 641 IE_NAME = 'youtube'
2eb88d95
PH
642 _TESTS = [
643 {
2d3d2997 644 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
645 'info_dict': {
646 'id': 'BaW_jenozKc',
647 'ext': 'mp4',
3867038a 648 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
649 'uploader': 'Philipp Hagemeister',
650 'uploader_id': 'phihag',
ec85ded8 651 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
652 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
653 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 654 'upload_date': '20121002',
3867038a 655 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 656 'categories': ['Science & Technology'],
3867038a 657 'tags': ['youtube-dl'],
556dbe7f 658 'duration': 10,
dbdaaa23 659 'view_count': int,
3e7c1224
PH
660 'like_count': int,
661 'dislike_count': int,
7c80519c 662 'start_time': 1,
297a564b 663 'end_time': 9,
2eb88d95 664 }
0e853ca4 665 },
fccd3771 666 {
4bc3a23e
PH
667 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
668 'note': 'Embed-only video (#1746)',
669 'info_dict': {
670 'id': 'yZIXLfi8CZQ',
671 'ext': 'mp4',
672 'upload_date': '20120608',
673 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
674 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
675 'uploader': 'SET India',
94bfcd23 676 'uploader_id': 'setindia',
ec85ded8 677 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 678 'age_limit': 18,
545cc85d 679 },
680 'skip': 'Private video',
fccd3771 681 },
11b56058 682 {
8bdd16b4 683 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
684 'note': 'Use the first video ID in the URL',
685 'info_dict': {
686 'id': 'BaW_jenozKc',
687 'ext': 'mp4',
3867038a 688 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
689 'uploader': 'Philipp Hagemeister',
690 'uploader_id': 'phihag',
ec85ded8 691 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 692 'upload_date': '20121002',
3867038a 693 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 694 'categories': ['Science & Technology'],
3867038a 695 'tags': ['youtube-dl'],
556dbe7f 696 'duration': 10,
dbdaaa23 697 'view_count': int,
11b56058
PM
698 'like_count': int,
699 'dislike_count': int,
34a7de29
S
700 },
701 'params': {
702 'skip_download': True,
703 },
11b56058 704 },
dd27fd17 705 {
2d3d2997 706 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
707 'note': '256k DASH audio (format 141) via DASH manifest',
708 'info_dict': {
709 'id': 'a9LDPn-MO4I',
710 'ext': 'm4a',
711 'upload_date': '20121002',
712 'uploader_id': '8KVIDEO',
ec85ded8 713 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
714 'description': '',
715 'uploader': '8KVIDEO',
716 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 717 },
4bc3a23e
PH
718 'params': {
719 'youtube_include_dash_manifest': True,
720 'format': '141',
4919603f 721 },
de3c7fe0 722 'skip': 'format 141 not served anymore',
dd27fd17 723 },
8bdd16b4 724 # DASH manifest with encrypted signature
725 {
726 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
727 'info_dict': {
728 'id': 'IB3lcPjvWLA',
729 'ext': 'm4a',
730 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
731 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
732 'duration': 244,
733 'uploader': 'AfrojackVEVO',
734 'uploader_id': 'AfrojackVEVO',
735 'upload_date': '20131011',
cc2db878 736 'abr': 129.495,
8bdd16b4 737 },
738 'params': {
739 'youtube_include_dash_manifest': True,
740 'format': '141/bestaudio[ext=m4a]',
741 },
742 },
aa79ac0c
PH
743 # Controversy video
744 {
745 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
746 'info_dict': {
747 'id': 'T4XJQO3qol8',
748 'ext': 'mp4',
556dbe7f 749 'duration': 219,
aa79ac0c 750 'upload_date': '20100909',
4fe54c12 751 'uploader': 'Amazing Atheist',
aa79ac0c 752 'uploader_id': 'TheAmazingAtheist',
ec85ded8 753 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 754 'title': 'Burning Everyone\'s Koran',
545cc85d 755 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 756 }
c522adb1 757 },
dd2d55f1 758 # Normal age-gate video (embed allowed)
c522adb1 759 {
2d3d2997 760 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
761 'info_dict': {
762 'id': 'HtVdAasjOgU',
763 'ext': 'mp4',
764 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 765 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 766 'duration': 142,
c522adb1
JMF
767 'uploader': 'The Witcher',
768 'uploader_id': 'WitcherGame',
ec85ded8 769 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 770 'upload_date': '20140605',
34952f09 771 'age_limit': 18,
c522adb1
JMF
772 },
773 },
8bdd16b4 774 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
775 # YouTube Red ad is not captured for creator
776 {
777 'url': '__2ABJjxzNo',
778 'info_dict': {
779 'id': '__2ABJjxzNo',
780 'ext': 'mp4',
781 'duration': 266,
782 'upload_date': '20100430',
783 'uploader_id': 'deadmau5',
784 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 785 'creator': 'deadmau5',
786 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 787 'uploader': 'deadmau5',
788 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 789 'alt_title': 'Some Chords',
8bdd16b4 790 },
791 'expected_warnings': [
792 'DASH manifest missing',
793 ]
794 },
067aa17e 795 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
796 {
797 'url': 'lqQg6PlCWgI',
798 'info_dict': {
799 'id': 'lqQg6PlCWgI',
800 'ext': 'mp4',
556dbe7f 801 'duration': 6085,
90227264 802 'upload_date': '20150827',
cbe2bd91 803 'uploader_id': 'olympic',
ec85ded8 804 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 805 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 806 'uploader': 'Olympic',
cbe2bd91
PH
807 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
808 },
809 'params': {
810 'skip_download': 'requires avconv',
e52a40ab 811 }
cbe2bd91 812 },
6271f1ca
PH
813 # Non-square pixels
814 {
815 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
816 'info_dict': {
817 'id': '_b-2C3KPAM0',
818 'ext': 'mp4',
819 'stretched_ratio': 16 / 9.,
556dbe7f 820 'duration': 85,
6271f1ca
PH
821 'upload_date': '20110310',
822 'uploader_id': 'AllenMeow',
ec85ded8 823 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 824 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 825 'uploader': '孫ᄋᄅ',
6271f1ca
PH
826 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
827 },
06b491eb
S
828 },
829 # url_encoded_fmt_stream_map is empty string
830 {
831 'url': 'qEJwOuvDf7I',
832 'info_dict': {
833 'id': 'qEJwOuvDf7I',
f57b7835 834 'ext': 'webm',
06b491eb
S
835 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
836 'description': '',
837 'upload_date': '20150404',
838 'uploader_id': 'spbelect',
839 'uploader': 'Наблюдатели Петербурга',
840 },
841 'params': {
842 'skip_download': 'requires avconv',
e323cf3f
S
843 },
844 'skip': 'This live event has ended.',
06b491eb 845 },
067aa17e 846 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
847 {
848 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
849 'info_dict': {
850 'id': 'FIl7x6_3R5Y',
eb6793ba 851 'ext': 'webm',
da77d856
S
852 'title': 'md5:7b81415841e02ecd4313668cde88737a',
853 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 854 'duration': 220,
da77d856
S
855 'upload_date': '20150625',
856 'uploader_id': 'dorappi2000',
ec85ded8 857 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 858 'uploader': 'dorappi2000',
eb6793ba 859 'formats': 'mincount:31',
da77d856 860 },
eb6793ba 861 'skip': 'not actual anymore',
2ee8f5d8 862 },
8a1a26ce
YCH
863 # DASH manifest with segment_list
864 {
865 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
866 'md5': '8ce563a1d667b599d21064e982ab9e31',
867 'info_dict': {
868 'id': 'CsmdDsKjzN8',
869 'ext': 'mp4',
17ee98e1 870 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
871 'uploader': 'Airtek',
872 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
873 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
874 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
875 },
876 'params': {
877 'youtube_include_dash_manifest': True,
878 'format': '135', # bestvideo
be49068d
S
879 },
880 'skip': 'This live event has ended.',
2ee8f5d8 881 },
cf7e015f
S
882 {
883 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 884 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 885 'info_dict': {
545cc85d 886 'id': 'jvGDaLqkpTg',
887 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
888 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
889 },
890 'playlist': [{
891 'info_dict': {
545cc85d 892 'id': 'jvGDaLqkpTg',
cf7e015f 893 'ext': 'mp4',
545cc85d 894 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
895 'description': 'md5:e03b909557865076822aa169218d6a5d',
896 'duration': 10643,
897 'upload_date': '20161111',
898 'uploader': 'Team PGP',
899 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
900 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
901 },
902 }, {
903 'info_dict': {
545cc85d 904 'id': '3AKt1R1aDnw',
cf7e015f 905 'ext': 'mp4',
545cc85d 906 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
907 'description': 'md5:e03b909557865076822aa169218d6a5d',
908 'duration': 10991,
909 'upload_date': '20161111',
910 'uploader': 'Team PGP',
911 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
912 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
913 },
914 }, {
915 'info_dict': {
545cc85d 916 'id': 'RtAMM00gpVc',
cf7e015f 917 'ext': 'mp4',
545cc85d 918 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
919 'description': 'md5:e03b909557865076822aa169218d6a5d',
920 'duration': 10995,
921 'upload_date': '20161111',
922 'uploader': 'Team PGP',
923 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
924 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
925 },
926 }, {
927 'info_dict': {
545cc85d 928 'id': '6N2fdlP3C5U',
cf7e015f 929 'ext': 'mp4',
545cc85d 930 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
931 'description': 'md5:e03b909557865076822aa169218d6a5d',
932 'duration': 10990,
933 'upload_date': '20161111',
934 'uploader': 'Team PGP',
935 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
936 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
937 },
938 }],
939 'params': {
940 'skip_download': True,
941 },
cbaed4bb 942 },
f9f49d87 943 {
067aa17e 944 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
945 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
946 'info_dict': {
947 'id': 'gVfLd0zydlo',
948 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
949 },
950 'playlist_count': 2,
be49068d 951 'skip': 'Not multifeed anymore',
f9f49d87 952 },
cbaed4bb 953 {
2d3d2997 954 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 955 'only_matching': True,
0e49d9a6 956 },
6d4fc66b 957 {
2d3d2997 958 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
959 'only_matching': True,
960 },
0e49d9a6 961 {
067aa17e 962 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 963 # Also tests cut-off URL expansion in video description (see
067aa17e
S
964 # https://github.com/ytdl-org/youtube-dl/issues/1892,
965 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
966 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
967 'info_dict': {
968 'id': 'lsguqyKfVQg',
969 'ext': 'mp4',
970 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 971 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 972 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 973 'duration': 133,
0e49d9a6
LL
974 'upload_date': '20151119',
975 'uploader_id': 'IronSoulElf',
ec85ded8 976 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 977 'uploader': 'IronSoulElf',
eb6793ba
S
978 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
979 'track': 'Dark Walk - Position Music',
980 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 981 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
982 },
983 'params': {
984 'skip_download': True,
985 },
986 },
61f92af1 987 {
067aa17e 988 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
989 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
990 'only_matching': True,
991 },
313dfc45
LL
992 {
993 # Video with yt:stretch=17:0
994 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
995 'info_dict': {
996 'id': 'Q39EVAstoRM',
997 'ext': 'mp4',
998 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
999 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1000 'upload_date': '20151107',
1001 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1002 'uploader': 'CH GAMER DROID',
1003 },
1004 'params': {
1005 'skip_download': True,
1006 },
be49068d 1007 'skip': 'This video does not exist.',
313dfc45 1008 },
201c1459 1009 {
1010 # Video with incomplete 'yt:stretch=16:'
1011 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1012 'only_matching': True,
1013 },
7caf9830
S
1014 {
1015 # Video licensed under Creative Commons
1016 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1017 'info_dict': {
1018 'id': 'M4gD1WSo5mA',
1019 'ext': 'mp4',
1020 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1021 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1022 'duration': 721,
7caf9830
S
1023 'upload_date': '20150127',
1024 'uploader_id': 'BerkmanCenter',
ec85ded8 1025 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1026 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1027 'license': 'Creative Commons Attribution license (reuse allowed)',
1028 },
1029 'params': {
1030 'skip_download': True,
1031 },
1032 },
fd050249
S
1033 {
1034 # Channel-like uploader_url
1035 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1036 'info_dict': {
1037 'id': 'eQcmzGIKrzg',
1038 'ext': 'mp4',
1039 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1040 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1041 'duration': 4060,
fd050249 1042 'upload_date': '20151119',
eb6793ba 1043 'uploader': 'Bernie Sanders',
fd050249 1044 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1046 'license': 'Creative Commons Attribution license (reuse allowed)',
1047 },
1048 'params': {
1049 'skip_download': True,
1050 },
1051 },
040ac686
S
1052 {
1053 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1054 'only_matching': True,
7f29cf54
S
1055 },
1056 {
067aa17e 1057 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1058 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1059 'only_matching': True,
6496ccb4
S
1060 },
1061 {
1062 # Rental video preview
1063 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1064 'info_dict': {
1065 'id': 'uGpuVWrhIzE',
1066 'ext': 'mp4',
1067 'title': 'Piku - Trailer',
1068 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1069 'upload_date': '20150811',
1070 'uploader': 'FlixMatrix',
1071 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1072 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1073 'license': 'Standard YouTube License',
1074 },
1075 'params': {
1076 'skip_download': True,
1077 },
eb6793ba 1078 'skip': 'This video is not available.',
022a5d66 1079 },
12afdc2a
S
1080 {
1081 # YouTube Red video with episode data
1082 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1083 'info_dict': {
1084 'id': 'iqKdEhx-dD4',
1085 'ext': 'mp4',
1086 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1087 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1088 'duration': 2085,
12afdc2a
S
1089 'upload_date': '20170118',
1090 'uploader': 'Vsauce',
1091 'uploader_id': 'Vsauce',
1092 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1093 'series': 'Mind Field',
1094 'season_number': 1,
1095 'episode_number': 1,
1096 },
1097 'params': {
1098 'skip_download': True,
1099 },
1100 'expected_warnings': [
1101 'Skipping DASH manifest',
1102 ],
1103 },
c7121fa7
S
1104 {
1105 # The following content has been identified by the YouTube community
1106 # as inappropriate or offensive to some audiences.
1107 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1108 'info_dict': {
1109 'id': '6SJNVb0GnPI',
1110 'ext': 'mp4',
1111 'title': 'Race Differences in Intelligence',
1112 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1113 'duration': 965,
1114 'upload_date': '20140124',
1115 'uploader': 'New Century Foundation',
1116 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1117 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1118 },
1119 'params': {
1120 'skip_download': True,
1121 },
545cc85d 1122 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1123 },
022a5d66
S
1124 {
1125 # itag 212
1126 'url': '1t24XAntNCY',
1127 'only_matching': True,
fd5c4aab
S
1128 },
1129 {
1130 # geo restricted to JP
1131 'url': 'sJL6WA-aGkQ',
1132 'only_matching': True,
1133 },
cd5a74a2
S
1134 {
1135 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1136 'only_matching': True,
1137 },
bc2ca1bb 1138 {
1139 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1140 'only_matching': True,
1141 },
1142 {
1143 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1144 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1145 'only_matching': True,
1146 },
825cd268
RA
1147 {
1148 # DRM protected
1149 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1150 'only_matching': True,
4fe54c12
S
1151 },
1152 {
1153 # Video with unsupported adaptive stream type formats
1154 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1155 'info_dict': {
1156 'id': 'Z4Vy8R84T1U',
1157 'ext': 'mp4',
1158 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1159 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1160 'duration': 433,
1161 'upload_date': '20130923',
1162 'uploader': 'Amelia Putri Harwita',
1163 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1164 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1165 'formats': 'maxcount:10',
1166 },
1167 'params': {
1168 'skip_download': True,
1169 'youtube_include_dash_manifest': False,
1170 },
5429d6a9 1171 'skip': 'not actual anymore',
5caabd3c 1172 },
1173 {
822b9d9c 1174 # Youtube Music Auto-generated description
5caabd3c 1175 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1176 'info_dict': {
1177 'id': 'MgNrAu2pzNs',
1178 'ext': 'mp4',
1179 'title': 'Voyeur Girl',
1180 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1181 'upload_date': '20190312',
5429d6a9
S
1182 'uploader': 'Stephen - Topic',
1183 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1184 'artist': 'Stephen',
1185 'track': 'Voyeur Girl',
1186 'album': 'it\'s too much love to know my dear',
1187 'release_date': '20190313',
1188 'release_year': 2019,
1189 },
1190 'params': {
1191 'skip_download': True,
1192 },
1193 },
66b48727
RA
1194 {
1195 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1196 'only_matching': True,
1197 },
011e75e6
S
1198 {
1199 # invalid -> valid video id redirection
1200 'url': 'DJztXj2GPfl',
1201 'info_dict': {
1202 'id': 'DJztXj2GPfk',
1203 'ext': 'mp4',
1204 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1205 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1206 'upload_date': '20090125',
1207 'uploader': 'Prochorowka',
1208 'uploader_id': 'Prochorowka',
1209 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1210 'artist': 'Panjabi MC',
1211 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1212 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1213 },
1214 'params': {
1215 'skip_download': True,
1216 },
545cc85d 1217 'skip': 'Video unavailable',
ea74e00b
DP
1218 },
1219 {
1220 # empty description results in an empty string
1221 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1222 'info_dict': {
1223 'id': 'x41yOUIvK2k',
1224 'ext': 'mp4',
1225 'title': 'IMG 3456',
1226 'description': '',
1227 'upload_date': '20170613',
1228 'uploader_id': 'ElevageOrVert',
1229 'uploader': 'ElevageOrVert',
1230 },
1231 'params': {
1232 'skip_download': True,
1233 },
1234 },
a0566bbf 1235 {
29f7c58a 1236 # with '};' inside yt initial data (see [1])
1237 # see [2] for an example with '};' inside ytInitialPlayerResponse
1238 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1239 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1240 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1241 'info_dict': {
1242 'id': 'CHqg6qOn4no',
1243 'ext': 'mp4',
1244 'title': 'Part 77 Sort a list of simple types in c#',
1245 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1246 'upload_date': '20130831',
1247 'uploader_id': 'kudvenkat',
1248 'uploader': 'kudvenkat',
1249 },
1250 'params': {
1251 'skip_download': True,
1252 },
1253 },
29f7c58a 1254 {
1255 # another example of '};' in ytInitialData
1256 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1257 'only_matching': True,
1258 },
1259 {
1260 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1261 'only_matching': True,
1262 },
545cc85d 1263 {
cc2db878 1264 # https://github.com/ytdl-org/youtube-dl/pull/28094
1265 'url': 'OtqTfy26tG0',
1266 'info_dict': {
1267 'id': 'OtqTfy26tG0',
1268 'ext': 'mp4',
1269 'title': 'Burn Out',
1270 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1271 'upload_date': '20141120',
1272 'uploader': 'The Cinematic Orchestra - Topic',
1273 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1274 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1275 'artist': 'The Cinematic Orchestra',
1276 'track': 'Burn Out',
1277 'album': 'Every Day',
1278 'release_data': None,
1279 'release_year': None,
1280 },
1281 'params': {
1282 'skip_download': True,
1283 },
545cc85d 1284 },
bc2ca1bb 1285 {
1286 # controversial video, only works with bpctr when authenticated with cookies
1287 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1288 'only_matching': True,
1289 },
f7ad7160 1290 {
1291 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1292 'url': 'cBvYw8_A0vQ',
1293 'info_dict': {
1294 'id': 'cBvYw8_A0vQ',
1295 'ext': 'mp4',
1296 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1297 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1298 'upload_date': '20201120',
1299 'uploader': 'Walk around Japan',
1300 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1301 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1302 },
1303 'params': {
1304 'skip_download': True,
1305 },
0fb983f6 1306 }, {
1307 # Has multiple audio streams
1308 'url': 'WaOKSUlf4TM',
1309 'only_matching': True
f7ad7160 1310 },
120916da 1311 {
1312 # multiple subtitles with same lang_code
1313 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1314 'only_matching': True,
1315 },
2eb88d95
PH
1316 ]
1317
201c1459 1318 @classmethod
1319 def suitable(cls, url):
1bdae7d3 1320 # Hack for lazy extractors until more generic solution is implemented
1321 # (see #28780)
1322 from .youtube import parse_qs
201c1459 1323 qs = parse_qs(url)
1324 if qs.get('list', [None])[0]:
1325 return False
1326 return super(YoutubeIE, cls).suitable(url)
1327
e0df6211
PH
1328 def __init__(self, *args, **kwargs):
1329 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1330 self._code_cache = {}
83799698 1331 self._player_cache = {}
e0df6211 1332
60064c53
PH
1333 def _signature_cache_id(self, example_sig):
1334 """ Return a string representation of a signature """
78caa52a 1335 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1336
e40c758c
S
1337 @classmethod
1338 def _extract_player_info(cls, player_url):
1339 for player_re in cls._PLAYER_INFO_RE:
1340 id_m = re.search(player_re, player_url)
1341 if id_m:
1342 break
1343 else:
c081b35c 1344 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1345 return id_m.group('id')
e40c758c
S
1346
1347 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1348 player_id = self._extract_player_info(player_url)
e0df6211 1349
c4417ddb 1350 # Read from filesystem cache
545cc85d 1351 func_id = 'js_%s_%s' % (
1352 player_id, self._signature_cache_id(example_sig))
c4417ddb 1353 assert os.path.basename(func_id) == func_id
a0e07d31 1354
69ea8ca4 1355 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1356 if cache_spec is not None:
78caa52a 1357 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1358
545cc85d 1359 if player_id not in self._code_cache:
1360 self._code_cache[player_id] = self._download_webpage(
e0df6211 1361 player_url, video_id,
545cc85d 1362 note='Downloading player ' + player_id,
69ea8ca4 1363 errnote='Download of %s failed' % player_url)
545cc85d 1364 code = self._code_cache[player_id]
1365 res = self._parse_sig_js(code)
e0df6211 1366
785521bf
PH
1367 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1368 cache_res = res(test_string)
1369 cache_spec = [ord(c) for c in cache_res]
83799698 1370
69ea8ca4 1371 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1372 return res
1373
60064c53 1374 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1375 def gen_sig_code(idxs):
1376 def _genslice(start, end, step):
78caa52a 1377 starts = '' if start == 0 else str(start)
8bcc8756 1378 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1379 steps = '' if step == 1 else (':%d' % step)
78caa52a 1380 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1381
1382 step = None
7af808a5
PH
1383 # Quelch pyflakes warnings - start will be set when step is set
1384 start = '(Never used)'
edf3e38e
PH
1385 for i, prev in zip(idxs[1:], idxs[:-1]):
1386 if step is not None:
1387 if i - prev == step:
1388 continue
1389 yield _genslice(start, prev, step)
1390 step = None
1391 continue
1392 if i - prev in [-1, 1]:
1393 step = i - prev
1394 start = prev
1395 continue
1396 else:
78caa52a 1397 yield 's[%d]' % prev
edf3e38e 1398 if step is None:
78caa52a 1399 yield 's[%d]' % i
edf3e38e
PH
1400 else:
1401 yield _genslice(start, i, step)
1402
78caa52a 1403 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1404 cache_res = func(test_string)
edf3e38e 1405 cache_spec = [ord(c) for c in cache_res]
78caa52a 1406 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1407 signature_id_tuple = '(%s)' % (
1408 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1409 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1410 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1411 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1412
e0df6211
PH
1413 def _parse_sig_js(self, jscode):
1414 funcname = self._search_regex(
abefc03f
S
1415 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1416 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1417 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1418 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1419 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1420 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1421 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1422 # Obsolete patterns
1423 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1424 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1425 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1426 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1427 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1428 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1429 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1430 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1431 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1432
1433 jsi = JSInterpreter(jscode)
1434 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1435 return lambda s: initial_function([s])
1436
545cc85d 1437 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1438 """Turn the encrypted s field into a working signature"""
6b37f0be 1439
c8bf86d5 1440 if player_url is None:
69ea8ca4 1441 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1442
69ea8ca4 1443 if player_url.startswith('//'):
78caa52a 1444 player_url = 'https:' + player_url
3c90cc8b
S
1445 elif not re.match(r'https?://', player_url):
1446 player_url = compat_urlparse.urljoin(
1447 'https://www.youtube.com', player_url)
c8bf86d5 1448 try:
62af3a0e 1449 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1450 if player_id not in self._player_cache:
1451 func = self._extract_signature_function(
60064c53 1452 video_id, player_url, s
c8bf86d5
PH
1453 )
1454 self._player_cache[player_id] = func
1455 func = self._player_cache[player_id]
1456 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1457 self._print_sig_code(func, s)
c8bf86d5
PH
1458 return func(s)
1459 except Exception as e:
1460 tb = traceback.format_exc()
1461 raise ExtractorError(
78caa52a 1462 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1463
545cc85d 1464 def _mark_watched(self, video_id, player_response):
21c340b8
S
1465 playback_url = url_or_none(try_get(
1466 player_response,
545cc85d 1467 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1468 if not playback_url:
1469 return
1470 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1471 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1472
1473 # cpn generation algorithm is reverse engineered from base.js.
1474 # In fact it works even with dummy cpn.
1475 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1476 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1477
1478 qs.update({
1479 'ver': ['2'],
1480 'cpn': [cpn],
1481 })
1482 playback_url = compat_urlparse.urlunparse(
15707c7e 1483 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1484
1485 self._download_webpage(
1486 playback_url, video_id, 'Marking watched',
1487 'Unable to mark watched', fatal=False)
1488
66c9fa36
S
1489 @staticmethod
1490 def _extract_urls(webpage):
1491 # Embedded YouTube player
1492 entries = [
1493 unescapeHTML(mobj.group('url'))
1494 for mobj in re.finditer(r'''(?x)
1495 (?:
1496 <iframe[^>]+?src=|
1497 data-video-url=|
1498 <embed[^>]+?src=|
1499 embedSWF\(?:\s*|
1500 <object[^>]+data=|
1501 new\s+SWFObject\(
1502 )
1503 (["\'])
1504 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1505 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1506 \1''', webpage)]
1507
1508 # lazyYT YouTube embed
1509 entries.extend(list(map(
1510 unescapeHTML,
1511 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1512
1513 # Wordpress "YouTube Video Importer" plugin
1514 matches = re.findall(r'''(?x)<div[^>]+
1515 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1516 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1517 entries.extend(m[-1] for m in matches)
1518
1519 return entries
1520
1521 @staticmethod
1522 def _extract_url(webpage):
1523 urls = YoutubeIE._extract_urls(webpage)
1524 return urls[0] if urls else None
1525
97665381
PH
1526 @classmethod
1527 def extract_id(cls, url):
1528 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1529 if mobj is None:
69ea8ca4 1530 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1531 video_id = mobj.group(2)
1532 return video_id
1533
545cc85d 1534 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1535 chapters_list = try_get(
8bdd16b4 1536 data,
84213ea8
S
1537 lambda x: x['playerOverlays']
1538 ['playerOverlayRenderer']
1539 ['decoratedPlayerBarRenderer']
1540 ['decoratedPlayerBarRenderer']
1541 ['playerBar']
1542 ['chapteredPlayerBarRenderer']
1543 ['chapters'],
1544 list)
1545 if not chapters_list:
1546 return
1547
1548 def chapter_time(chapter):
1549 return float_or_none(
1550 try_get(
1551 chapter,
1552 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1553 int),
1554 scale=1000)
1555 chapters = []
1556 for next_num, chapter in enumerate(chapters_list, start=1):
1557 start_time = chapter_time(chapter)
1558 if start_time is None:
1559 continue
1560 end_time = (chapter_time(chapters_list[next_num])
1561 if next_num < len(chapters_list) else duration)
1562 if end_time is None:
1563 continue
1564 title = try_get(
1565 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1566 compat_str)
1567 chapters.append({
1568 'start_time': start_time,
1569 'end_time': end_time,
1570 'title': title,
1571 })
1572 return chapters
1573
545cc85d 1574 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1575 return self._parse_json(self._search_regex(
1576 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1577 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1578
d92f5d5a 1579 @staticmethod
1580 def parse_time_text(time_text):
1581 """
1582 Parse the comment time text
1583 time_text is in the format 'X units ago (edited)'
1584 """
1585 time_text_split = time_text.split(' ')
1586 if len(time_text_split) >= 3:
1587 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1588
a1c5d2ca
M
1589 @staticmethod
1590 def _join_text_entries(runs):
1591 text = None
1592 for run in runs:
1593 if not isinstance(run, dict):
1594 continue
1595 sub_text = try_get(run, lambda x: x['text'], compat_str)
1596 if sub_text:
1597 if not text:
1598 text = sub_text
1599 continue
1600 text += sub_text
1601 return text
1602
1603 def _extract_comment(self, comment_renderer, parent=None):
1604 comment_id = comment_renderer.get('commentId')
1605 if not comment_id:
1606 return
1607 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1608 text = self._join_text_entries(comment_text_runs) or ''
1609 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1610 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1611 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1612 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1613 author_id = try_get(comment_renderer,
1614 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1615 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1616 lambda x: x['likeCount']), compat_str)) or 0
1617 author_thumbnail = try_get(comment_renderer,
1618 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1619
1620 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1621 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1622 return {
1623 'id': comment_id,
1624 'text': text,
d92f5d5a 1625 'timestamp': timestamp,
a1c5d2ca
M
1626 'time_text': time_text,
1627 'like_count': votes,
1628 'is_favorited': is_liked,
1629 'author': author,
1630 'author_id': author_id,
1631 'author_thumbnail': author_thumbnail,
1632 'author_is_uploader': author_is_uploader,
1633 'parent': parent or 'root'
1634 }
1635
1636 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1637 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1638
1639 def extract_thread(parent_renderer):
1640 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1641 if not parent:
1642 comment_counts[2] = 0
1643 for content in contents:
1644 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1645 comment_renderer = try_get(
1646 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1647 content, (lambda x: x['commentRenderer'], dict))
1648
1649 if not comment_renderer:
1650 continue
1651 comment = self._extract_comment(comment_renderer, parent)
1652 if not comment:
1653 continue
1654 comment_counts[0] += 1
1655 yield comment
1656 # Attempt to get the replies
1657 comment_replies_renderer = try_get(
1658 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1659
1660 if comment_replies_renderer:
1661 comment_counts[2] += 1
1662 comment_entries_iter = self._comment_entries(
f4f751af 1663 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1664 parent=comment.get('id'), session_token_list=session_token_list,
1665 comment_counts=comment_counts)
1666
1667 for reply_comment in comment_entries_iter:
1668 yield reply_comment
1669
1670 if not comment_counts:
1671 # comment so far, est. total comments, current comment thread #
1672 comment_counts = [0, 0, 0]
a1c5d2ca
M
1673
1674 # TODO: Generalize the download code with TabIE
f4f751af 1675 context = self._extract_context(ytcfg)
1676 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1677 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1678 first_continuation = False
1679 if parent is None:
1680 first_continuation = True
1681
1682 for page_num in itertools.count(0):
1683 if not continuation:
1684 break
f4f751af 1685 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a1c5d2ca
M
1686 retries = self._downloader.params.get('extractor_retries', 3)
1687 count = -1
1688 last_error = None
1689
1690 while count < retries:
1691 count += 1
1692 if last_error:
1693 self.report_warning('%s. Retrying ...' % last_error)
1694 try:
1695 query = {
1696 'ctoken': continuation['ctoken'],
1697 'pbj': 1,
1698 'type': 'next',
1699 }
1700 if parent:
1701 query['action_get_comment_replies'] = 1
1702 else:
1703 query['action_get_comments'] = 1
1704
1705 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1706 if page_num == 0:
1707 if first_continuation:
d92f5d5a 1708 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1709 else:
d92f5d5a 1710 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1711 else:
d92f5d5a 1712 note_prefix = '%sDownloading comment%s page %d %s' % (
1713 ' ' if parent else '',
a1c5d2ca
M
1714 ' replies' if parent else '',
1715 page_num,
1716 comment_prog_str)
1717
1718 browse = self._download_json(
1719 'https://www.youtube.com/comment_service_ajax', None,
1720 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1721 headers=headers, query=query,
1722 data=urlencode_postdata({
1723 'session_token': session_token_list[0]
1724 }))
1725 except ExtractorError as e:
1726 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1727 if e.cause.code == 413:
d92f5d5a 1728 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1729 return
1730 # Downloading page may result in intermittent 5xx HTTP error
1731 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1732 last_error = 'HTTP Error %s' % e.cause.code
1733 if e.cause.code == 404:
d92f5d5a 1734 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1735 if count < retries:
1736 continue
1737 raise
1738 else:
1739 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1740 if session_token:
1741 session_token_list[0] = session_token
1742
1743 response = try_get(browse,
1744 (lambda x: x['response'],
1745 lambda x: x[1]['response'])) or {}
1746
1747 if response.get('continuationContents'):
1748 break
1749
1750 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1751 if browse.get('reload'):
d92f5d5a 1752 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1753
1754 # TODO: not tested, merged from old extractor
1755 err_msg = browse.get('externalErrorMessage')
1756 if err_msg:
1757 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1758
1759 # Youtube sometimes sends incomplete data
1760 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1761 last_error = 'Incomplete data received'
1762 if count >= retries:
6a39ee13 1763 raise ExtractorError(last_error)
a1c5d2ca
M
1764
1765 if not response:
1766 break
f4f751af 1767 visitor_data = try_get(
1768 response,
1769 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1770 compat_str) or visitor_data
a1c5d2ca
M
1771
1772 known_continuation_renderers = {
1773 'itemSectionContinuation': extract_thread,
1774 'commentRepliesContinuation': extract_thread
1775 }
1776
1777 # extract next root continuation from the results
1778 continuation_contents = try_get(
1779 response, lambda x: x['continuationContents'], dict) or {}
1780
1781 for key, value in continuation_contents.items():
1782 if key not in known_continuation_renderers:
1783 continue
1784 continuation_renderer = value
1785
1786 if first_continuation:
1787 first_continuation = False
1788 expected_comment_count = try_get(
1789 continuation_renderer,
1790 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1791 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1792 compat_str)
1793
1794 if expected_comment_count:
1795 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1796 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1797 yield comment_counts[1]
1798
1799 # TODO: cli arg.
1800 # 1/True for newest, 0/False for popular (default)
1801 comment_sort_index = int(True)
1802 sort_continuation_renderer = try_get(
1803 continuation_renderer,
1804 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1805 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1806 # If this fails, the initial continuation page
1807 # starts off with popular anyways.
1808 if sort_continuation_renderer:
1809 continuation = YoutubeTabIE._build_continuation_query(
1810 continuation=sort_continuation_renderer.get('continuation'),
1811 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1812 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1813 break
1814
1815 for entry in known_continuation_renderers[key](continuation_renderer):
1816 yield entry
1817
1818 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1819 break
1820
1821 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1822 """Entry for comment extraction"""
1823 comments = []
1824 known_entry_comment_renderers = (
1825 'itemSectionRenderer',
1826 )
1827 estimated_total = 0
1828 for entry in contents:
1829 for key, renderer in entry.items():
1830 if key not in known_entry_comment_renderers:
1831 continue
1832
1833 comment_iter = self._comment_entries(
1834 renderer,
1835 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1836 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1837 ytcfg=ytcfg,
a1c5d2ca
M
1838 session_token_list=[xsrf_token])
1839
1840 for comment in comment_iter:
1841 if isinstance(comment, int):
1842 estimated_total = comment
1843 continue
1844 comments.append(comment)
1845 break
d92f5d5a 1846 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1847 return {
1848 'comments': comments,
1849 'comment_count': len(comments),
1850 }
1851
c5e8d7af 1852 def _real_extract(self, url):
cf7e015f 1853 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1854 video_id = self._match_id(url)
1855 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1856 webpage_url = base_url + 'watch?v=' + video_id
1857 webpage = self._download_webpage(
cce889b9 1858 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1859
1860 player_response = None
1861 if webpage:
1862 player_response = self._extract_yt_initial_variable(
1863 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1864 video_id, 'initial player response')
f4f751af 1865
1866 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1867 if not player_response:
1868 player_response = self._call_api(
f4f751af 1869 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1870
1871 playability_status = player_response.get('playabilityStatus') or {}
1872 if playability_status.get('reason') == 'Sign in to confirm your age':
1873 pr = self._parse_json(try_get(compat_parse_qs(
1874 self._download_webpage(
1875 base_url + 'get_video_info', video_id,
1876 'Refetching age-gated info webpage',
1877 'unable to download video info webpage', query={
1878 'video_id': video_id,
7c60c33e 1879 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1880 }, fatal=False)),
1881 lambda x: x['player_response'][0],
1882 compat_str) or '{}', video_id)
1883 if pr:
1884 player_response = pr
1885
1886 trailer_video_id = try_get(
1887 playability_status,
1888 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1889 compat_str)
1890 if trailer_video_id:
1891 return self.url_result(
1892 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1893
545cc85d 1894 def get_text(x):
1895 if not x:
c2d125d9 1896 return
f7ad7160 1897 text = x.get('simpleText')
1898 if text and isinstance(text, compat_str):
1899 return text
1900 runs = x.get('runs')
1901 if not isinstance(runs, list):
1902 return
1903 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
15be3eb5 1904
545cc85d 1905 search_meta = (
1906 lambda x: self._html_search_meta(x, webpage, default=None)) \
1907 if webpage else lambda x: None
dbdaaa23 1908
545cc85d 1909 video_details = player_response.get('videoDetails') or {}
37357d21 1910 microformat = try_get(
545cc85d 1911 player_response,
1912 lambda x: x['microformat']['playerMicroformatRenderer'],
1913 dict) or {}
1914 video_title = video_details.get('title') \
1915 or get_text(microformat.get('title')) \
1916 or search_meta(['og:title', 'twitter:title', 'title'])
1917 video_description = video_details.get('shortDescription')
cf7e015f 1918
8fe10494 1919 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1920 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1921 multifeed_metadata_list = try_get(
1922 player_response,
1923 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1924 compat_str)
8fe10494
S
1925 if multifeed_metadata_list:
1926 entries = []
1927 feed_ids = []
1928 for feed in multifeed_metadata_list.split(','):
1929 # Unquote should take place before split on comma (,) since textual
1930 # fields may contain comma as well (see
067aa17e 1931 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1932 feed_data = compat_parse_qs(
1933 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1934
1935 def feed_entry(name):
545cc85d 1936 return try_get(
1937 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1938
1939 feed_id = feed_entry('id')
1940 if not feed_id:
1941 continue
1942 feed_title = feed_entry('title')
1943 title = video_title
1944 if feed_title:
1945 title += ' (%s)' % feed_title
8fe10494
S
1946 entries.append({
1947 '_type': 'url_transparent',
1948 'ie_key': 'Youtube',
1949 'url': smuggle_url(
545cc85d 1950 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1951 {'force_singlefeed': True}),
6b09401b 1952 'title': title,
8fe10494 1953 })
6b09401b 1954 feed_ids.append(feed_id)
8fe10494
S
1955 self.to_screen(
1956 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1957 % (', '.join(feed_ids), video_id))
545cc85d 1958 return self.playlist_result(
1959 entries, video_id, video_title, video_description)
8fe10494
S
1960 else:
1961 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1962
545cc85d 1963 formats = []
1964 itags = []
cc2db878 1965 itag_qualities = {}
545cc85d 1966 player_url = None
dca3ff4a 1967 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1968 streaming_data = player_response.get('streamingData') or {}
1969 streaming_formats = streaming_data.get('formats') or []
1970 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1971 for fmt in streaming_formats:
1972 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1973 continue
321bf820 1974
cc2db878 1975 itag = str_or_none(fmt.get('itag'))
1976 quality = fmt.get('quality')
1977 if itag and quality:
1978 itag_qualities[itag] = quality
1979 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1980 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1981 # number of fragment that would subsequently requested with (`&sq=N`)
1982 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1983 continue
1984
545cc85d 1985 fmt_url = fmt.get('url')
1986 if not fmt_url:
1987 sc = compat_parse_qs(fmt.get('signatureCipher'))
1988 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1989 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1990 if not (sc and fmt_url and encrypted_sig):
1991 continue
1992 if not player_url:
1993 if not webpage:
1994 continue
1995 player_url = self._search_regex(
1996 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1997 webpage, 'player URL', fatal=False)
1998 if not player_url:
201e9eaa 1999 continue
545cc85d 2000 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2001 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2002 fmt_url += '&' + sp + '=' + signature
2003
545cc85d 2004 if itag:
2005 itags.append(itag)
cc2db878 2006 tbr = float_or_none(
2007 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
0fb983f6 2008 audio_track = fmt.get('audioTrack') or {}
545cc85d 2009 dct = {
2010 'asr': int_or_none(fmt.get('audioSampleRate')),
2011 'filesize': int_or_none(fmt.get('contentLength')),
2012 'format_id': itag,
0fb983f6 2013 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2014 'fps': int_or_none(fmt.get('fps')),
2015 'height': int_or_none(fmt.get('height')),
dca3ff4a 2016 'quality': q(quality),
cc2db878 2017 'tbr': tbr,
545cc85d 2018 'url': fmt_url,
2019 'width': fmt.get('width'),
0fb983f6 2020 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2021 }
2022 mimetype = fmt.get('mimeType')
2023 if mimetype:
2024 mobj = re.match(
2025 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2026 if mobj:
2027 dct['ext'] = mimetype2ext(mobj.group(1))
2028 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2029 no_audio = dct.get('acodec') == 'none'
2030 no_video = dct.get('vcodec') == 'none'
2031 if no_audio:
2032 dct['vbr'] = tbr
2033 if no_video:
2034 dct['abr'] = tbr
2035 if no_audio or no_video:
545cc85d 2036 dct['downloader_options'] = {
2037 # Youtube throttles chunks >~10M
2038 'http_chunk_size': 10485760,
bf1317d2 2039 }
7c60c33e 2040 if dct.get('ext'):
2041 dct['container'] = dct['ext'] + '_dash'
545cc85d 2042 formats.append(dct)
2043
2044 hls_manifest_url = streaming_data.get('hlsManifestUrl')
2045 if hls_manifest_url:
2046 for f in self._extract_m3u8_formats(
2047 hls_manifest_url, video_id, 'mp4', fatal=False):
2048 itag = self._search_regex(
2049 r'/itag/(\d+)', f['url'], 'itag', default=None)
2050 if itag:
2051 f['format_id'] = itag
2052 formats.append(f)
2053
1418a043 2054 if self._downloader.params.get('youtube_include_dash_manifest', True):
545cc85d 2055 dash_manifest_url = streaming_data.get('dashManifestUrl')
2056 if dash_manifest_url:
545cc85d 2057 for f in self._extract_mpd_formats(
2058 dash_manifest_url, video_id, fatal=False):
cc2db878 2059 itag = f['format_id']
2060 if itag in itags:
2061 continue
dca3ff4a 2062 if itag in itag_qualities:
2063 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
2064 # but kept to maintain feature parity (and code similarity) with youtube-dl
2065 # Remove if this causes any issues with sorting in future
2066 f['quality'] = q(itag_qualities[itag])
545cc85d 2067 filesize = int_or_none(self._search_regex(
2068 r'/clen/(\d+)', f.get('fragment_base_url')
2069 or f['url'], 'file size', default=None))
2070 if filesize:
2071 f['filesize'] = filesize
cc2db878 2072 formats.append(f)
bf1317d2 2073
545cc85d 2074 if not formats:
63ad4d43 2075 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2076 self.raise_no_formats(
545cc85d 2077 'This video is DRM protected.', expected=True)
2078 pemr = try_get(
2079 playability_status,
2080 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2081 dict) or {}
2082 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2083 subreason = pemr.get('subreason')
2084 if subreason:
2085 subreason = clean_html(get_text(subreason))
2086 if subreason == 'The uploader has not made this video available in your country.':
2087 countries = microformat.get('availableCountries')
2088 if not countries:
2089 regions_allowed = search_meta('regionsAllowed')
2090 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2091 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2092 reason += '\n' + subreason
2093 if reason:
b7da73eb 2094 self.raise_no_formats(reason, expected=True)
bf1317d2 2095
545cc85d 2096 self._sort_formats(formats)
bf1317d2 2097
545cc85d 2098 keywords = video_details.get('keywords') or []
2099 if not keywords and webpage:
2100 keywords = [
2101 unescapeHTML(m.group('content'))
2102 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2103 for keyword in keywords:
2104 if keyword.startswith('yt:stretch='):
201c1459 2105 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2106 if mobj:
2107 # NB: float is intentional for forcing float division
2108 w, h = (float(v) for v in mobj.groups())
2109 if w > 0 and h > 0:
2110 ratio = w / h
2111 for f in formats:
2112 if f.get('vcodec') != 'none':
2113 f['stretched_ratio'] = ratio
2114 break
6449cd80 2115
545cc85d 2116 thumbnails = []
2117 for container in (video_details, microformat):
2118 for thumbnail in (try_get(
2119 container,
2120 lambda x: x['thumbnail']['thumbnails'], list) or []):
2121 thumbnail_url = thumbnail.get('url')
2122 if not thumbnail_url:
bf1317d2 2123 continue
1988fab7 2124 # Sometimes youtube gives a wrong thumbnail URL. See:
2125 # https://github.com/yt-dlp/yt-dlp/issues/233
2126 # https://github.com/ytdl-org/youtube-dl/issues/28023
2127 if 'maxresdefault' in thumbnail_url:
2128 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2129 thumbnails.append({
2130 'height': int_or_none(thumbnail.get('height')),
2131 'url': thumbnail_url,
2132 'width': int_or_none(thumbnail.get('width')),
2133 })
2134 if thumbnails:
2135 break
a6211d23 2136 else:
545cc85d 2137 thumbnail = search_meta(['og:image', 'twitter:image'])
2138 if thumbnail:
2139 thumbnails = [{'url': thumbnail}]
2140
2141 category = microformat.get('category') or search_meta('genre')
2142 channel_id = video_details.get('channelId') \
2143 or microformat.get('externalChannelId') \
2144 or search_meta('channelId')
2145 duration = int_or_none(
2146 video_details.get('lengthSeconds')
2147 or microformat.get('lengthSeconds')) \
2148 or parse_duration(search_meta('duration'))
2149 is_live = video_details.get('isLive')
2150 owner_profile_url = microformat.get('ownerProfileUrl')
2151
2152 info = {
2153 'id': video_id,
2154 'title': self._live_title(video_title) if is_live else video_title,
2155 'formats': formats,
2156 'thumbnails': thumbnails,
2157 'description': video_description,
2158 'upload_date': unified_strdate(
2159 microformat.get('uploadDate')
2160 or search_meta('uploadDate')),
2161 'uploader': video_details['author'],
2162 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2163 'uploader_url': owner_profile_url,
2164 'channel_id': channel_id,
2165 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2166 'duration': duration,
2167 'view_count': int_or_none(
2168 video_details.get('viewCount')
2169 or microformat.get('viewCount')
2170 or search_meta('interactionCount')),
2171 'average_rating': float_or_none(video_details.get('averageRating')),
2172 'age_limit': 18 if (
2173 microformat.get('isFamilySafe') is False
2174 or search_meta('isFamilyFriendly') == 'false'
2175 or search_meta('og:restrictions:age') == '18+') else 0,
2176 'webpage_url': webpage_url,
2177 'categories': [category] if category else None,
2178 'tags': keywords,
2179 'is_live': is_live,
2180 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2181 'was_live': video_details.get('isLiveContent'),
545cc85d 2182 }
b477fc13 2183
545cc85d 2184 pctr = try_get(
2185 player_response,
2186 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2187 subtitles = {}
2188 if pctr:
774d79cc 2189 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2190 lang_subs = container.setdefault(lang_code, [])
545cc85d 2191 for fmt in self._SUBTITLE_FORMATS:
2192 query.update({
2193 'fmt': fmt,
2194 })
2195 lang_subs.append({
2196 'ext': fmt,
2197 'url': update_url_query(base_url, query),
774d79cc 2198 'name': sub_name,
545cc85d 2199 })
7e72694b 2200
545cc85d 2201 for caption_track in (pctr.get('captionTracks') or []):
2202 base_url = caption_track.get('baseUrl')
2203 if not base_url:
2204 continue
2205 if caption_track.get('kind') != 'asr':
120916da 2206 lang_code = (
2207 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2208 or caption_track.get('languageCode'))
545cc85d 2209 if not lang_code:
2210 continue
2211 process_language(
774d79cc 2212 subtitles, base_url, lang_code,
2213 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2214 {})
545cc85d 2215 continue
2216 automatic_captions = {}
2217 for translation_language in (pctr.get('translationLanguages') or []):
2218 translation_language_code = translation_language.get('languageCode')
2219 if not translation_language_code:
2220 continue
2221 process_language(
2222 automatic_captions, base_url, translation_language_code,
774d79cc 2223 try_get(translation_language, lambda x: x['languageName']['simpleText']),
545cc85d 2224 {'tlang': translation_language_code})
2225 info['automatic_captions'] = automatic_captions
2226 info['subtitles'] = subtitles
7e72694b 2227
545cc85d 2228 parsed_url = compat_urllib_parse_urlparse(url)
2229 for component in [parsed_url.fragment, parsed_url.query]:
2230 query = compat_parse_qs(component)
2231 for k, v in query.items():
2232 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2233 d_k += '_time'
2234 if d_k not in info and k in s_ks:
2235 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2236
2237 # Youtube Music Auto-generated description
822b9d9c 2238 if video_description:
38d70284 2239 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2240 if mobj:
822b9d9c
RA
2241 release_year = mobj.group('release_year')
2242 release_date = mobj.group('release_date')
2243 if release_date:
2244 release_date = release_date.replace('-', '')
2245 if not release_year:
545cc85d 2246 release_year = release_date[:4]
2247 info.update({
2248 'album': mobj.group('album'.strip()),
2249 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2250 'track': mobj.group('track').strip(),
2251 'release_date': release_date,
cc2db878 2252 'release_year': int_or_none(release_year),
545cc85d 2253 })
7e72694b 2254
545cc85d 2255 initial_data = None
2256 if webpage:
2257 initial_data = self._extract_yt_initial_variable(
2258 webpage, self._YT_INITIAL_DATA_RE, video_id,
2259 'yt initial data')
2260 if not initial_data:
2261 initial_data = self._call_api(
f4f751af 2262 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2263
2264 if not is_live:
2265 try:
2266 # This will error if there is no livechat
2267 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2268 info['subtitles']['live_chat'] = [{
394dcd44 2269 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2270 'video_id': video_id,
2271 'ext': 'json',
2272 'protocol': 'youtube_live_chat_replay',
2273 }]
2274 except (KeyError, IndexError, TypeError):
2275 pass
2276
2277 if initial_data:
2278 chapters = self._extract_chapters_from_json(
2279 initial_data, video_id, duration)
2280 if not chapters:
2281 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2282 contents = try_get(
2283 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2284 list)
2285 if not contents:
2286 continue
2287
2288 def chapter_time(mmlir):
2289 return parse_duration(
2290 get_text(mmlir.get('timeDescription')))
2291
2292 chapters = []
2293 for next_num, content in enumerate(contents, start=1):
2294 mmlir = content.get('macroMarkersListItemRenderer') or {}
2295 start_time = chapter_time(mmlir)
2296 end_time = chapter_time(try_get(
2297 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2298 if next_num < len(contents) else duration
2299 if start_time is None or end_time is None:
2300 continue
2301 chapters.append({
2302 'start_time': start_time,
2303 'end_time': end_time,
2304 'title': get_text(mmlir.get('title')),
2305 })
2306 if chapters:
2307 break
2308 if chapters:
2309 info['chapters'] = chapters
2310
2311 contents = try_get(
2312 initial_data,
2313 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2314 list) or []
2315 for content in contents:
2316 vpir = content.get('videoPrimaryInfoRenderer')
2317 if vpir:
2318 stl = vpir.get('superTitleLink')
2319 if stl:
2320 stl = get_text(stl)
2321 if try_get(
2322 vpir,
2323 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2324 info['location'] = stl
2325 else:
2326 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2327 if mobj:
2328 info.update({
2329 'series': mobj.group(1),
2330 'season_number': int(mobj.group(2)),
2331 'episode_number': int(mobj.group(3)),
2332 })
2333 for tlb in (try_get(
2334 vpir,
2335 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2336 list) or []):
2337 tbr = tlb.get('toggleButtonRenderer') or {}
2338 for getter, regex in [(
2339 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2340 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2341 lambda x: x['accessibility'],
2342 lambda x: x['accessibilityData']['accessibilityData'],
2343 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2344 label = (try_get(tbr, getter, dict) or {}).get('label')
2345 if label:
2346 mobj = re.match(regex, label)
2347 if mobj:
2348 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2349 break
2350 sbr_tooltip = try_get(
2351 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2352 if sbr_tooltip:
2353 like_count, dislike_count = sbr_tooltip.split(' / ')
2354 info.update({
2355 'like_count': str_to_int(like_count),
2356 'dislike_count': str_to_int(dislike_count),
2357 })
2358 vsir = content.get('videoSecondaryInfoRenderer')
2359 if vsir:
2360 info['channel'] = get_text(try_get(
2361 vsir,
2362 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2363 dict))
545cc85d 2364 rows = try_get(
2365 vsir,
2366 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2367 list) or []
2368 multiple_songs = False
2369 for row in rows:
2370 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2371 multiple_songs = True
2372 break
2373 for row in rows:
2374 mrr = row.get('metadataRowRenderer') or {}
2375 mrr_title = mrr.get('title')
2376 if not mrr_title:
2377 continue
2378 mrr_title = get_text(mrr['title'])
2379 mrr_contents_text = get_text(mrr['contents'][0])
2380 if mrr_title == 'License':
2381 info['license'] = mrr_contents_text
2382 elif not multiple_songs:
2383 if mrr_title == 'Album':
2384 info['album'] = mrr_contents_text
2385 elif mrr_title == 'Artist':
2386 info['artist'] = mrr_contents_text
2387 elif mrr_title == 'Song':
2388 info['track'] = mrr_contents_text
2389
2390 fallbacks = {
2391 'channel': 'uploader',
2392 'channel_id': 'uploader_id',
2393 'channel_url': 'uploader_url',
2394 }
2395 for to, frm in fallbacks.items():
2396 if not info.get(to):
2397 info[to] = info.get(frm)
2398
2399 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2400 v = info.get(s_k)
2401 if v:
2402 info[d_k] = v
b84071c0 2403
c224251a
M
2404 is_private = bool_or_none(video_details.get('isPrivate'))
2405 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2406 is_membersonly = None
b28f8d24 2407 is_premium = None
c224251a
M
2408 if initial_data and is_private is not None:
2409 is_membersonly = False
b28f8d24 2410 is_premium = False
c224251a
M
2411 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2412 for content in contents or []:
2413 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2414 for badge in badges or []:
2415 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2416 if label.lower() == 'members only':
2417 is_membersonly = True
2418 break
b28f8d24
M
2419 elif label.lower() == 'premium':
2420 is_premium = True
2421 break
2422 if is_membersonly or is_premium:
c224251a
M
2423 break
2424
2425 # TODO: Add this for playlists
2426 info['availability'] = self._availability(
2427 is_private=is_private,
b28f8d24 2428 needs_premium=is_premium,
c224251a
M
2429 needs_subscription=is_membersonly,
2430 needs_auth=info['age_limit'] >= 18,
2431 is_unlisted=None if is_private is None else is_unlisted)
2432
06167fbb 2433 # get xsrf for annotations or comments
2434 get_annotations = self._downloader.params.get('writeannotations', False)
2435 get_comments = self._downloader.params.get('getcomments', False)
2436 if get_annotations or get_comments:
29f7c58a 2437 xsrf_token = None
545cc85d 2438 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2439 if ytcfg:
2440 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2441 if not xsrf_token:
2442 xsrf_token = self._search_regex(
2443 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2444 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2445
2446 # annotations
06167fbb 2447 if get_annotations:
64b6a4e9
RA
2448 invideo_url = try_get(
2449 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2450 if xsrf_token and invideo_url:
29f7c58a 2451 xsrf_field_name = None
2452 if ytcfg:
2453 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2454 if not xsrf_field_name:
2455 xsrf_field_name = self._search_regex(
2456 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2457 webpage, 'xsrf field name',
29f7c58a 2458 group='xsrf_field_name', default='session_token')
8a784c74 2459 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2460 self._proto_relative_url(invideo_url),
2461 video_id, note='Downloading annotations',
2462 errnote='Unable to download video annotations', fatal=False,
2463 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2464
277d6ff5 2465 if get_comments:
a1c5d2ca 2466 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2467
545cc85d 2468 self.mark_watched(video_id, player_response)
d77ab8e2 2469
545cc85d 2470 return info
c5e8d7af 2471
5f6a1245 2472
8bdd16b4 2473class YoutubeTabIE(YoutubeBaseInfoExtractor):
2474 IE_DESC = 'YouTube.com tab'
70d5c17b 2475 _VALID_URL = r'''(?x)
2476 https?://
2477 (?:\w+\.)?
2478 (?:
2479 youtube(?:kids)?\.com|
2480 invidio\.us
2481 )/
2482 (?:
2483 (?:channel|c|user)/|
2484 (?P<not_channel>
9ba5705a 2485 feed/|hashtag/|
70d5c17b 2486 (?:playlist|watch)\?.*?\blist=
2487 )|
29f7c58a 2488 (?!(?:%s)\b) # Direct URLs
70d5c17b 2489 )
2490 (?P<id>[^/?\#&]+)
2491 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2492 IE_NAME = 'youtube:tab'
2493
81127aa5 2494 _TESTS = [{
8bdd16b4 2495 # playlists, multipage
2496 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2497 'playlist_mincount': 94,
2498 'info_dict': {
2499 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2500 'title': 'Игорь Клейнер - Playlists',
2501 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2502 'uploader': 'Игорь Клейнер',
2503 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2504 },
2505 }, {
2506 # playlists, multipage, different order
2507 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2508 'playlist_mincount': 94,
2509 'info_dict': {
2510 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2511 'title': 'Игорь Клейнер - Playlists',
2512 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2513 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2514 'uploader': 'Игорь Клейнер',
8bdd16b4 2515 },
201c1459 2516 }, {
2517 # playlists, series
2518 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2519 'playlist_mincount': 5,
2520 'info_dict': {
2521 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2522 'title': '3Blue1Brown - Playlists',
2523 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2524 },
8bdd16b4 2525 }, {
2526 # playlists, singlepage
2527 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2528 'playlist_mincount': 4,
2529 'info_dict': {
2530 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2531 'title': 'ThirstForScience - Playlists',
2532 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2533 'uploader': 'ThirstForScience',
2534 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2535 }
2536 }, {
2537 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2538 'only_matching': True,
2539 }, {
2540 # basic, single video playlist
0e30a7b9 2541 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2542 'info_dict': {
0e30a7b9 2543 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2544 'uploader': 'Sergey M.',
2545 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2546 'title': 'youtube-dl public playlist',
81127aa5 2547 },
0e30a7b9 2548 'playlist_count': 1,
9291475f 2549 }, {
8bdd16b4 2550 # empty playlist
0e30a7b9 2551 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2552 'info_dict': {
0e30a7b9 2553 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2554 'uploader': 'Sergey M.',
2555 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2556 'title': 'youtube-dl empty playlist',
9291475f
PH
2557 },
2558 'playlist_count': 0,
2559 }, {
8bdd16b4 2560 # Home tab
2561 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2562 'info_dict': {
8bdd16b4 2563 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2564 'title': 'lex will - Home',
2565 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2566 'uploader': 'lex will',
2567 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2568 },
8bdd16b4 2569 'playlist_mincount': 2,
9291475f 2570 }, {
8bdd16b4 2571 # Videos tab
2572 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2573 'info_dict': {
8bdd16b4 2574 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2575 'title': 'lex will - Videos',
2576 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2577 'uploader': 'lex will',
2578 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2579 },
8bdd16b4 2580 'playlist_mincount': 975,
9291475f 2581 }, {
8bdd16b4 2582 # Videos tab, sorted by popular
2583 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2584 'info_dict': {
8bdd16b4 2585 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2586 'title': 'lex will - Videos',
2587 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2588 'uploader': 'lex will',
2589 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2590 },
8bdd16b4 2591 'playlist_mincount': 199,
9291475f 2592 }, {
8bdd16b4 2593 # Playlists tab
2594 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2595 'info_dict': {
8bdd16b4 2596 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2597 'title': 'lex will - Playlists',
2598 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2599 'uploader': 'lex will',
2600 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2601 },
8bdd16b4 2602 'playlist_mincount': 17,
ac7553d0 2603 }, {
8bdd16b4 2604 # Community tab
2605 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2606 'info_dict': {
8bdd16b4 2607 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2608 'title': 'lex will - Community',
2609 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2610 'uploader': 'lex will',
2611 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2612 },
2613 'playlist_mincount': 18,
87dadd45 2614 }, {
8bdd16b4 2615 # Channels tab
2616 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2617 'info_dict': {
8bdd16b4 2618 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2619 'title': 'lex will - Channels',
2620 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2621 'uploader': 'lex will',
2622 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2623 },
deaec5af 2624 'playlist_mincount': 12,
6b08cdf6 2625 }, {
a0566bbf 2626 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2627 'only_matching': True,
2628 }, {
a0566bbf 2629 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2630 'only_matching': True,
2631 }, {
a0566bbf 2632 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2633 'only_matching': True,
2634 }, {
2635 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2636 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2637 'info_dict': {
2638 'title': '29C3: Not my department',
2639 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2640 'uploader': 'Christiaan008',
2641 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2642 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2643 },
2644 'playlist_count': 96,
2645 }, {
2646 'note': 'Large playlist',
2647 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2648 'info_dict': {
8bdd16b4 2649 'title': 'Uploads from Cauchemar',
2650 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2651 'uploader': 'Cauchemar',
2652 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2653 },
8bdd16b4 2654 'playlist_mincount': 1123,
2655 }, {
2656 # even larger playlist, 8832 videos
2657 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2658 'only_matching': True,
4b7df0d3
JMF
2659 }, {
2660 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2661 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2662 'info_dict': {
acf757f4
PH
2663 'title': 'Uploads from Interstellar Movie',
2664 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2665 'uploader': 'Interstellar Movie',
8bdd16b4 2666 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2667 },
481cc733 2668 'playlist_mincount': 21,
358de58c 2669 }, {
2670 'note': 'Playlist with "show unavailable videos" button',
2671 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2672 'info_dict': {
2673 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2674 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2675 'uploader': 'Phim Siêu Nhân Nhật Bản',
2676 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2677 },
2678 'playlist_mincount': 1400,
2679 'expected_warnings': [
2680 'YouTube said: INFO - Unavailable videos are hidden',
2681 ]
5d342002 2682 }, {
2683 'note': 'Playlist with unavailable videos in a later page',
2684 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2685 'info_dict': {
2686 'title': 'Uploads from BlankTV',
2687 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2688 'uploader': 'BlankTV',
2689 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2690 },
2691 'playlist_mincount': 20000,
8bdd16b4 2692 }, {
2693 # https://github.com/ytdl-org/youtube-dl/issues/21844
2694 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2695 'info_dict': {
2696 'title': 'Data Analysis with Dr Mike Pound',
2697 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2698 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2699 'uploader': 'Computerphile',
deaec5af 2700 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2701 },
2702 'playlist_mincount': 11,
2703 }, {
a0566bbf 2704 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2705 'only_matching': True,
dacb3a86
S
2706 }, {
2707 # Playlist URL that does not actually serve a playlist
2708 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2709 'info_dict': {
2710 'id': 'FqZTN594JQw',
2711 'ext': 'webm',
2712 'title': "Smiley's People 01 detective, Adventure Series, Action",
2713 'uploader': 'STREEM',
2714 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2715 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2716 'upload_date': '20150526',
2717 'license': 'Standard YouTube License',
2718 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2719 'categories': ['People & Blogs'],
2720 'tags': list,
dbdaaa23 2721 'view_count': int,
dacb3a86
S
2722 'like_count': int,
2723 'dislike_count': int,
2724 },
2725 'params': {
2726 'skip_download': True,
2727 },
13a75688 2728 'skip': 'This video is not available.',
dacb3a86 2729 'add_ie': [YoutubeIE.ie_key()],
481cc733 2730 }, {
8bdd16b4 2731 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2732 'only_matching': True,
66b48727 2733 }, {
8bdd16b4 2734 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2735 'only_matching': True,
a0566bbf 2736 }, {
2737 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2738 'info_dict': {
2739 'id': '9Auq9mYxFEE',
2740 'ext': 'mp4',
deaec5af 2741 'title': compat_str,
a0566bbf 2742 'uploader': 'Sky News',
2743 'uploader_id': 'skynews',
2744 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2745 'upload_date': '20191102',
deaec5af 2746 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2747 'categories': ['News & Politics'],
2748 'tags': list,
2749 'like_count': int,
2750 'dislike_count': int,
2751 },
2752 'params': {
2753 'skip_download': True,
2754 },
2755 }, {
2756 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2757 'info_dict': {
2758 'id': 'a48o2S1cPoo',
2759 'ext': 'mp4',
2760 'title': 'The Young Turks - Live Main Show',
2761 'uploader': 'The Young Turks',
2762 'uploader_id': 'TheYoungTurks',
2763 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2764 'upload_date': '20150715',
2765 'license': 'Standard YouTube License',
2766 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2767 'categories': ['News & Politics'],
2768 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2769 'like_count': int,
2770 'dislike_count': int,
2771 },
2772 'params': {
2773 'skip_download': True,
2774 },
2775 'only_matching': True,
2776 }, {
2777 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2778 'only_matching': True,
2779 }, {
2780 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2781 'only_matching': True,
3d3dddc9 2782 }, {
2783 'url': 'https://www.youtube.com/feed/trending',
2784 'only_matching': True,
2785 }, {
2786 # needs auth
2787 'url': 'https://www.youtube.com/feed/library',
2788 'only_matching': True,
2789 }, {
2790 # needs auth
2791 'url': 'https://www.youtube.com/feed/history',
2792 'only_matching': True,
2793 }, {
2794 # needs auth
2795 'url': 'https://www.youtube.com/feed/subscriptions',
2796 'only_matching': True,
2797 }, {
2798 # needs auth
2799 'url': 'https://www.youtube.com/feed/watch_later',
2800 'only_matching': True,
2801 }, {
2802 # no longer available?
2803 'url': 'https://www.youtube.com/feed/recommended',
2804 'only_matching': True,
29f7c58a 2805 }, {
2806 # inline playlist with not always working continuations
2807 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2808 'only_matching': True,
2809 }, {
2810 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2811 'only_matching': True,
2812 }, {
2813 'url': 'https://www.youtube.com/course',
2814 'only_matching': True,
2815 }, {
2816 'url': 'https://www.youtube.com/zsecurity',
2817 'only_matching': True,
2818 }, {
2819 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2820 'only_matching': True,
2821 }, {
2822 'url': 'https://www.youtube.com/TheYoungTurks/live',
2823 'only_matching': True,
39ed931e 2824 }, {
2825 'url': 'https://www.youtube.com/hashtag/cctv9',
2826 'info_dict': {
2827 'id': 'cctv9',
2828 'title': '#cctv9',
2829 },
2830 'playlist_mincount': 350,
201c1459 2831 }, {
2832 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2833 'only_matching': True,
29f7c58a 2834 }]
2835
2836 @classmethod
2837 def suitable(cls, url):
2838 return False if YoutubeIE.suitable(url) else super(
2839 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2840
2841 def _extract_channel_id(self, webpage):
2842 channel_id = self._html_search_meta(
2843 'channelId', webpage, 'channel id', default=None)
2844 if channel_id:
2845 return channel_id
2846 channel_url = self._html_search_meta(
2847 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2848 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2849 'twitter:app:url:googleplay'), webpage, 'channel url')
2850 return self._search_regex(
2851 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2852 channel_url, 'channel id')
15f6397c 2853
8bdd16b4 2854 @staticmethod
cd7c66cf 2855 def _extract_basic_item_renderer(item):
2856 # Modified from _extract_grid_item_renderer
201c1459 2857 known_basic_renderers = (
2858 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 2859 )
2860 for key, renderer in item.items():
201c1459 2861 if not isinstance(renderer, dict):
cd7c66cf 2862 continue
201c1459 2863 elif key in known_basic_renderers:
2864 return renderer
2865 elif key.startswith('grid') and key.endswith('Renderer'):
2866 return renderer
8bdd16b4 2867
8bdd16b4 2868 def _grid_entries(self, grid_renderer):
2869 for item in grid_renderer['items']:
2870 if not isinstance(item, dict):
39b62db1 2871 continue
cd7c66cf 2872 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2873 if not isinstance(renderer, dict):
2874 continue
2875 title = try_get(
201c1459 2876 renderer, (lambda x: x['title']['runs'][0]['text'],
2877 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 2878 # playlist
2879 playlist_id = renderer.get('playlistId')
2880 if playlist_id:
2881 yield self.url_result(
2882 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2883 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2884 video_title=title)
201c1459 2885 continue
8bdd16b4 2886 # video
2887 video_id = renderer.get('videoId')
2888 if video_id:
2889 yield self._extract_video(renderer)
201c1459 2890 continue
8bdd16b4 2891 # channel
2892 channel_id = renderer.get('channelId')
2893 if channel_id:
2894 title = try_get(
2895 renderer, lambda x: x['title']['simpleText'], compat_str)
2896 yield self.url_result(
2897 'https://www.youtube.com/channel/%s' % channel_id,
2898 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 2899 continue
2900 # generic endpoint URL support
2901 ep_url = urljoin('https://www.youtube.com/', try_get(
2902 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
2903 compat_str))
2904 if ep_url:
2905 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
2906 if ie.suitable(ep_url):
2907 yield self.url_result(
2908 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
2909 break
8bdd16b4 2910
3d3dddc9 2911 def _shelf_entries_from_content(self, shelf_renderer):
2912 content = shelf_renderer.get('content')
2913 if not isinstance(content, dict):
8bdd16b4 2914 return
cd7c66cf 2915 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2916 if renderer:
2917 # TODO: add support for nested playlists so each shelf is processed
2918 # as separate playlist
2919 # TODO: this includes only first N items
2920 for entry in self._grid_entries(renderer):
2921 yield entry
2922 renderer = content.get('horizontalListRenderer')
2923 if renderer:
2924 # TODO
2925 pass
8bdd16b4 2926
29f7c58a 2927 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2928 ep = try_get(
2929 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2930 compat_str)
2931 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2932 if shelf_url:
29f7c58a 2933 # Skipping links to another channels, note that checking for
2934 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2935 # will not work
2936 if skip_channels and '/channels?' in shelf_url:
2937 return
3d3dddc9 2938 title = try_get(
2939 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2940 yield self.url_result(shelf_url, video_title=title)
2941 # Shelf may not contain shelf URL, fallback to extraction from content
2942 for entry in self._shelf_entries_from_content(shelf_renderer):
2943 yield entry
c5e8d7af 2944
8bdd16b4 2945 def _playlist_entries(self, video_list_renderer):
2946 for content in video_list_renderer['contents']:
2947 if not isinstance(content, dict):
2948 continue
2949 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2950 if not isinstance(renderer, dict):
2951 continue
2952 video_id = renderer.get('videoId')
2953 if not video_id:
2954 continue
2955 yield self._extract_video(renderer)
07aeced6 2956
3462ffa8 2957 def _rich_entries(self, rich_grid_renderer):
2958 renderer = try_get(
70d5c17b 2959 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2960 video_id = renderer.get('videoId')
2961 if not video_id:
2962 return
2963 yield self._extract_video(renderer)
2964
8bdd16b4 2965 def _video_entry(self, video_renderer):
2966 video_id = video_renderer.get('videoId')
2967 if video_id:
2968 return self._extract_video(video_renderer)
dacb3a86 2969
8bdd16b4 2970 def _post_thread_entries(self, post_thread_renderer):
2971 post_renderer = try_get(
2972 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2973 if not post_renderer:
2974 return
2975 # video attachment
2976 video_renderer = try_get(
895b0931 2977 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
2978 video_id = video_renderer.get('videoId')
2979 if video_id:
2980 entry = self._extract_video(video_renderer)
8bdd16b4 2981 if entry:
2982 yield entry
895b0931 2983 # playlist attachment
2984 playlist_id = try_get(
2985 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
2986 if playlist_id:
2987 yield self.url_result(
e28f1c0a 2988 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2989 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 2990 # inline video links
2991 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2992 for run in runs:
2993 if not isinstance(run, dict):
2994 continue
2995 ep_url = try_get(
2996 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2997 if not ep_url:
2998 continue
2999 if not YoutubeIE.suitable(ep_url):
3000 continue
3001 ep_video_id = YoutubeIE._match_id(ep_url)
3002 if video_id == ep_video_id:
3003 continue
895b0931 3004 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3005
8bdd16b4 3006 def _post_thread_continuation_entries(self, post_thread_continuation):
3007 contents = post_thread_continuation.get('contents')
3008 if not isinstance(contents, list):
3009 return
3010 for content in contents:
3011 renderer = content.get('backstagePostThreadRenderer')
3012 if not isinstance(renderer, dict):
3013 continue
3014 for entry in self._post_thread_entries(renderer):
3015 yield entry
07aeced6 3016
39ed931e 3017 r''' # unused
3018 def _rich_grid_entries(self, contents):
3019 for content in contents:
3020 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3021 if video_renderer:
3022 entry = self._video_entry(video_renderer)
3023 if entry:
3024 yield entry
3025 '''
3026
29f7c58a 3027 @staticmethod
3028 def _build_continuation_query(continuation, ctp=None):
3029 query = {
3030 'ctoken': continuation,
3031 'continuation': continuation,
3032 }
3033 if ctp:
3034 query['itct'] = ctp
3035 return query
3036
8bdd16b4 3037 @staticmethod
3038 def _extract_next_continuation_data(renderer):
3039 next_continuation = try_get(
3040 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3041 if not next_continuation:
3042 return
3043 continuation = next_continuation.get('continuation')
3044 if not continuation:
3045 return
3046 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3047 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3048
8bdd16b4 3049 @classmethod
3050 def _extract_continuation(cls, renderer):
3051 next_continuation = cls._extract_next_continuation_data(renderer)
3052 if next_continuation:
3053 return next_continuation
cc2db878 3054 contents = []
3055 for key in ('contents', 'items'):
3056 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3057 for content in contents:
3058 if not isinstance(content, dict):
3059 continue
3060 continuation_ep = try_get(
3061 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3062 dict)
3063 if not continuation_ep:
3064 continue
3065 continuation = try_get(
3066 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3067 if not continuation:
3068 continue
3069 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3070 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3071
f4f751af 3072 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3073
70d5c17b 3074 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3075 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3076 for content in contents:
3077 if not isinstance(content, dict):
8bdd16b4 3078 continue
70d5c17b 3079 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3080 if not is_renderer:
70d5c17b 3081 renderer = content.get('richItemRenderer')
3462ffa8 3082 if renderer:
3083 for entry in self._rich_entries(renderer):
3084 yield entry
3085 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3086 continue
3462ffa8 3087 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3088 for isr_content in isr_contents:
3089 if not isinstance(isr_content, dict):
3090 continue
69184e41 3091
3092 known_renderers = {
3093 'playlistVideoListRenderer': self._playlist_entries,
3094 'gridRenderer': self._grid_entries,
3095 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3096 'backstagePostThreadRenderer': self._post_thread_entries,
3097 'videoRenderer': lambda x: [self._video_entry(x)],
3098 }
3099 for key, renderer in isr_content.items():
3100 if key not in known_renderers:
3101 continue
3102 for entry in known_renderers[key](renderer):
3103 if entry:
3104 yield entry
3462ffa8 3105 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3106 break
70d5c17b 3107
3462ffa8 3108 if not continuation_list[0]:
3109 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3110
3111 if not continuation_list[0]:
3112 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3113
3114 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3115 tab_content = try_get(tab, lambda x: x['content'], dict)
3116 if not tab_content:
3117 return
3462ffa8 3118 parent_renderer = (
29f7c58a 3119 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3120 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3121 for entry in extract_entries(parent_renderer):
3122 yield entry
3462ffa8 3123 continuation = continuation_list[0]
f4f751af 3124 context = self._extract_context(ytcfg)
3125 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3126
8bdd16b4 3127 for page_num in itertools.count(1):
3128 if not continuation:
3129 break
79360d99 3130 query = {
3131 'continuation': continuation['continuation'],
3132 'clickTracking': {'clickTrackingParams': continuation['itct']}
3133 }
f4f751af 3134 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3135 response = self._extract_response(
3136 item_id='%s page %s' % (item_id, page_num),
3137 query=query, headers=headers, ytcfg=ytcfg,
3138 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3139
3140 if not response:
8bdd16b4 3141 break
f4f751af 3142 visitor_data = try_get(
3143 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3144
69184e41 3145 known_continuation_renderers = {
3146 'playlistVideoListContinuation': self._playlist_entries,
3147 'gridContinuation': self._grid_entries,
3148 'itemSectionContinuation': self._post_thread_continuation_entries,
3149 'sectionListContinuation': extract_entries, # for feeds
3150 }
8bdd16b4 3151 continuation_contents = try_get(
69184e41 3152 response, lambda x: x['continuationContents'], dict) or {}
3153 continuation_renderer = None
3154 for key, value in continuation_contents.items():
3155 if key not in known_continuation_renderers:
3462ffa8 3156 continue
69184e41 3157 continuation_renderer = value
3158 continuation_list = [None]
3159 for entry in known_continuation_renderers[key](continuation_renderer):
3160 yield entry
3161 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3162 break
3163 if continuation_renderer:
3164 continue
c5e8d7af 3165
a1b535bd 3166 known_renderers = {
3167 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3168 'gridVideoRenderer': (self._grid_entries, 'items'),
3169 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3170 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3171 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3172 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3173 }
cce889b9 3174 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3175 continuation_items = try_get(
cce889b9 3176 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3177 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3178 video_items_renderer = None
3179 for key, value in continuation_item.items():
3180 if key not in known_renderers:
8bdd16b4 3181 continue
a1b535bd 3182 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3183 continuation_list = [None]
a1b535bd 3184 for entry in known_renderers[key][0](video_items_renderer):
3185 yield entry
9ba5705a 3186 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3187 break
3188 if video_items_renderer:
3189 continue
8bdd16b4 3190 break
9558dcec 3191
8bdd16b4 3192 @staticmethod
3193 def _extract_selected_tab(tabs):
3194 for tab in tabs:
3195 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3196 return tab['tabRenderer']
2b3c2546 3197 else:
8bdd16b4 3198 raise ExtractorError('Unable to find selected tab')
b82f815f 3199
8bdd16b4 3200 @staticmethod
3201 def _extract_uploader(data):
3202 uploader = {}
3203 sidebar_renderer = try_get(
3204 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3205 if sidebar_renderer:
3206 for item in sidebar_renderer:
3207 if not isinstance(item, dict):
3208 continue
3209 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3210 if not isinstance(renderer, dict):
3211 continue
3212 owner = try_get(
3213 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3214 if owner:
3215 uploader['uploader'] = owner.get('text')
3216 uploader['uploader_id'] = try_get(
3217 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3218 uploader['uploader_url'] = urljoin(
3219 'https://www.youtube.com/',
3220 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3221 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3222
d069eca7 3223 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3224 playlist_id = title = description = channel_url = channel_name = channel_id = None
3225 thumbnails_list = tags = []
3226
8bdd16b4 3227 selected_tab = self._extract_selected_tab(tabs)
3228 renderer = try_get(
3229 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3230 if renderer:
b60419c5 3231 channel_name = renderer.get('title')
3232 channel_url = renderer.get('channelUrl')
3233 channel_id = renderer.get('externalId')
39ed931e 3234 else:
64c0d954 3235 renderer = try_get(
3236 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3237
8bdd16b4 3238 if renderer:
3239 title = renderer.get('title')
ecc97af3 3240 description = renderer.get('description', '')
b60419c5 3241 playlist_id = channel_id
3242 tags = renderer.get('keywords', '').split()
3243 thumbnails_list = (
3244 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3245 or try_get(
3246 data,
3247 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3248 list)
b60419c5 3249 or [])
3250
3251 thumbnails = []
3252 for t in thumbnails_list:
3253 if not isinstance(t, dict):
3254 continue
3255 thumbnail_url = url_or_none(t.get('url'))
3256 if not thumbnail_url:
3257 continue
3258 thumbnails.append({
3259 'url': thumbnail_url,
3260 'width': int_or_none(t.get('width')),
3261 'height': int_or_none(t.get('height')),
3262 })
3462ffa8 3263 if playlist_id is None:
70d5c17b 3264 playlist_id = item_id
3265 if title is None:
39ed931e 3266 title = (
3267 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3268 or playlist_id)
b60419c5 3269 title += format_field(selected_tab, 'title', ' - %s')
3270
3271 metadata = {
3272 'playlist_id': playlist_id,
3273 'playlist_title': title,
3274 'playlist_description': description,
3275 'uploader': channel_name,
3276 'uploader_id': channel_id,
3277 'uploader_url': channel_url,
3278 'thumbnails': thumbnails,
3279 'tags': tags,
3280 }
3281 if not channel_id:
3282 metadata.update(self._extract_uploader(data))
3283 metadata.update({
3284 'channel': metadata['uploader'],
3285 'channel_id': metadata['uploader_id'],
3286 'channel_url': metadata['uploader_url']})
3287 return self.playlist_result(
d069eca7
M
3288 self._entries(
3289 selected_tab, playlist_id,
3290 self._extract_identity_token(webpage, item_id),
f4f751af 3291 self._extract_account_syncid(data),
3292 self._extract_ytcfg(item_id, webpage)),
b60419c5 3293 **metadata)
73c4ac2c 3294
79360d99 3295 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3296 first_id = last_id = None
79360d99 3297 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3298 headers = self._generate_api_headers(
3299 ytcfg, account_syncid=self._extract_account_syncid(data),
3300 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3301 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3302 for page_num in itertools.count(1):
cd7c66cf 3303 videos = list(self._playlist_entries(playlist))
3304 if not videos:
3305 return
2be71994 3306 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3307 if start >= len(videos):
3308 return
3309 for video in videos[start:]:
3310 if video['id'] == first_id:
3311 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3312 return
3313 yield video
3314 first_id = first_id or videos[0]['id']
3315 last_id = videos[-1]['id']
79360d99 3316 watch_endpoint = try_get(
3317 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3318 query = {
3319 'playlistId': playlist_id,
3320 'videoId': watch_endpoint.get('videoId') or last_id,
3321 'index': watch_endpoint.get('index') or len(videos),
3322 'params': watch_endpoint.get('params') or 'OAE%3D'
3323 }
3324 response = self._extract_response(
3325 item_id='%s page %d' % (playlist_id, page_num),
3326 query=query,
3327 ep='next',
3328 headers=headers,
3329 check_get_keys='contents'
3330 )
cd7c66cf 3331 playlist = try_get(
79360d99 3332 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3333
79360d99 3334 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3335 title = playlist.get('title') or try_get(
3336 data, lambda x: x['titleText']['simpleText'], compat_str)
3337 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3338
3339 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3340 playlist_url = urljoin(url, try_get(
3341 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3342 compat_str))
3343 if playlist_url and playlist_url != url:
3344 return self.url_result(
3345 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3346 video_title=title)
cd7c66cf 3347
8bdd16b4 3348 return self.playlist_result(
79360d99 3349 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3350 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3351
f3eaa8dd
M
3352 def _extract_alerts(self, data, expected=False):
3353
3354 def _real_extract_alerts():
3355 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3356 if not isinstance(alert_dict, dict):
02ced43c 3357 continue
f3eaa8dd
M
3358 for alert in alert_dict.values():
3359 alert_type = alert.get('type')
3360 if not alert_type:
3361 continue
3ffc7c89 3362 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
02ced43c 3363 if message:
3364 yield alert_type, message
f3eaa8dd 3365 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3ffc7c89 3366 message += try_get(run, lambda x: x['text'], compat_str)
3367 if message:
3368 yield alert_type, message
f3eaa8dd 3369
3ffc7c89 3370 errors = []
3371 warnings = []
f3eaa8dd
M
3372 for alert_type, alert_message in _real_extract_alerts():
3373 if alert_type.lower() == 'error':
3ffc7c89 3374 errors.append([alert_type, alert_message])
f3eaa8dd 3375 else:
3ffc7c89 3376 warnings.append([alert_type, alert_message])
f3eaa8dd 3377
3ffc7c89 3378 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3379 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3380 if errors:
3381 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3382
358de58c 3383 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3384 """
3385 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3386 """
3387 sidebar_renderer = try_get(
5d342002 3388 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3389 if not sidebar_renderer:
3390 return
3391 browse_id = params = None
358de58c 3392 for item in sidebar_renderer:
3393 if not isinstance(item, dict):
3394 continue
3395 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3396 menu_renderer = try_get(
3397 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3398 for menu_item in menu_renderer:
3399 if not isinstance(menu_item, dict):
3400 continue
3401 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3402 text = try_get(
3403 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3404 if not text or text.lower() != 'show unavailable videos':
3405 continue
3406 browse_endpoint = try_get(
3407 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3408 browse_id = browse_endpoint.get('browseId')
3409 params = browse_endpoint.get('params')
5d342002 3410 break
3411
3412 ytcfg = self._extract_ytcfg(item_id, webpage)
3413 headers = self._generate_api_headers(
3414 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3415 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3416 visitor_data=try_get(
3417 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3418 query = {
3419 'params': params or 'wgYCCAA=',
3420 'browseId': browse_id or 'VL%s' % item_id
3421 }
3422 return self._extract_response(
3423 item_id=item_id, headers=headers, query=query,
3424 check_get_keys='contents', fatal=False,
3425 note='Downloading API JSON with unavailable videos')
358de58c 3426
79360d99 3427 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3428 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3429 response = None
3430 last_error = None
3431 count = -1
3432 retries = self._downloader.params.get('extractor_retries', 3)
3433 if check_get_keys is None:
3434 check_get_keys = []
3435 while count < retries:
3436 count += 1
3437 if last_error:
3438 self.report_warning('%s. Retrying ...' % last_error)
3439 try:
3440 response = self._call_api(
3441 ep=ep, fatal=True, headers=headers,
358de58c 3442 video_id=item_id, query=query,
79360d99 3443 context=self._extract_context(ytcfg),
3444 api_key=self._extract_api_key(ytcfg),
3445 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3446 except ExtractorError as e:
3447 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3448 # Downloading page may result in intermittent 5xx HTTP error
3449 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3450 last_error = 'HTTP Error %s' % e.cause.code
3451 if count < retries:
3452 continue
358de58c 3453 if fatal:
3454 raise
3455 else:
3456 self.report_warning(error_to_compat_str(e))
3457 return
3458
79360d99 3459 else:
3460 # Youtube may send alerts if there was an issue with the continuation page
3461 self._extract_alerts(response, expected=False)
3462 if not check_get_keys or dict_get(response, check_get_keys):
3463 break
3464 # Youtube sometimes sends incomplete data
3465 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3466 last_error = 'Incomplete data received'
3467 if count >= retries:
358de58c 3468 if fatal:
3469 raise ExtractorError(last_error)
3470 else:
3471 self.report_warning(last_error)
3472 return
79360d99 3473 return response
3474
cd7c66cf 3475 def _extract_webpage(self, url, item_id):
62bff2c1 3476 retries = self._downloader.params.get('extractor_retries', 3)
3477 count = -1
c705177d 3478 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3479 while count < retries:
62bff2c1 3480 count += 1
14fdfea9 3481 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3482 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3483 if count:
c705177d 3484 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3485 webpage = self._download_webpage(
3486 url, item_id,
cd7c66cf 3487 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3488 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3489 self._extract_alerts(data, expected=True)
14fdfea9 3490 if data.get('contents') or data.get('currentVideoEndpoint'):
3491 break
c705177d 3492 if count >= retries:
6a39ee13 3493 raise ExtractorError(last_error)
cd7c66cf 3494 return webpage, data
3495
3496 def _real_extract(self, url):
3497 item_id = self._match_id(url)
3498 url = compat_urlparse.urlunparse(
3499 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
53ed7066 3500 compat_opts = self._downloader.params.get('compat_opts', [])
cd7c66cf 3501
3502 # This is not matched in a channel page with a tab selected
3503 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3504 mobj = mobj.groupdict() if mobj else {}
53ed7066 3505 if mobj and not mobj.get('not_channel') and 'no-youtube-channel-redirect' not in compat_opts:
6a39ee13 3506 self.report_warning(
cd7c66cf 3507 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3508 'To download only the videos in the home page, add a "/featured" to the URL')
3509 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3510
3511 # Handle both video/playlist URLs
201c1459 3512 qs = parse_qs(url)
cd7c66cf 3513 video_id = qs.get('v', [None])[0]
3514 playlist_id = qs.get('list', [None])[0]
3515
3516 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3517 if not playlist_id:
3518 # If there is neither video or playlist ids,
3519 # youtube redirects to home page, which is undesirable
3520 raise ExtractorError('Unable to recognize tab page')
6a39ee13 3521 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3522 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3523
3524 if video_id and playlist_id:
3525 if self._downloader.params.get('noplaylist'):
3526 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3527 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3528 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3529
3530 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3531
358de58c 3532 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3533 if 'no-youtube-unavailable-videos' not in compat_opts:
3534 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
358de58c 3535
8bdd16b4 3536 tabs = try_get(
3537 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3538 if tabs:
d069eca7 3539 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3540
8bdd16b4 3541 playlist = try_get(
3542 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3543 if playlist:
79360d99 3544 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3545
a0566bbf 3546 video_id = try_get(
3547 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3548 compat_str) or video_id
8bdd16b4 3549 if video_id:
6a39ee13 3550 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3551 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3552
8bdd16b4 3553 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3554
c5e8d7af 3555
8bdd16b4 3556class YoutubePlaylistIE(InfoExtractor):
3557 IE_DESC = 'YouTube.com playlists'
3558 _VALID_URL = r'''(?x)(?:
3559 (?:https?://)?
3560 (?:\w+\.)?
3561 (?:
3562 (?:
3563 youtube(?:kids)?\.com|
29f7c58a 3564 invidio\.us
8bdd16b4 3565 )
3566 /.*?\?.*?\blist=
3567 )?
3568 (?P<id>%(playlist_id)s)
3569 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3570 IE_NAME = 'youtube:playlist'
cdc628a4 3571 _TESTS = [{
8bdd16b4 3572 'note': 'issue #673',
3573 'url': 'PLBB231211A4F62143',
cdc628a4 3574 'info_dict': {
8bdd16b4 3575 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3576 'id': 'PLBB231211A4F62143',
3577 'uploader': 'Wickydoo',
3578 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3579 },
3580 'playlist_mincount': 29,
3581 }, {
3582 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3583 'info_dict': {
3584 'title': 'YDL_safe_search',
3585 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3586 },
3587 'playlist_count': 2,
3588 'skip': 'This playlist is private',
9558dcec 3589 }, {
8bdd16b4 3590 'note': 'embedded',
3591 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3592 'playlist_count': 4,
9558dcec 3593 'info_dict': {
8bdd16b4 3594 'title': 'JODA15',
3595 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3596 'uploader': 'milan',
3597 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3598 }
cdc628a4 3599 }, {
8bdd16b4 3600 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3601 'playlist_mincount': 982,
3602 'info_dict': {
3603 'title': '2018 Chinese New Singles (11/6 updated)',
3604 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3605 'uploader': 'LBK',
3606 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3607 }
daa0df9e 3608 }, {
29f7c58a 3609 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3610 'only_matching': True,
3611 }, {
3612 # music album playlist
3613 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3614 'only_matching': True,
3615 }]
3616
3617 @classmethod
3618 def suitable(cls, url):
201c1459 3619 if YoutubeTabIE.suitable(url):
3620 return False
1bdae7d3 3621 # Hack for lazy extractors until more generic solution is implemented
3622 # (see #28780)
3623 from .youtube import parse_qs
201c1459 3624 qs = parse_qs(url)
3625 if qs.get('v', [None])[0]:
3626 return False
3627 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3628
3629 def _real_extract(self, url):
3630 playlist_id = self._match_id(url)
201c1459 3631 qs = parse_qs(url)
29f7c58a 3632 if not qs:
3633 qs = {'list': playlist_id}
3634 return self.url_result(
3635 update_url_query('https://www.youtube.com/playlist', qs),
3636 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3637
3638
3639class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3640 IE_DESC = 'youtu.be'
29f7c58a 3641 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3642 _TESTS = [{
8bdd16b4 3643 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3644 'info_dict': {
3645 'id': 'yeWKywCrFtk',
3646 'ext': 'mp4',
3647 'title': 'Small Scale Baler and Braiding Rugs',
3648 'uploader': 'Backus-Page House Museum',
3649 'uploader_id': 'backuspagemuseum',
3650 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3651 'upload_date': '20161008',
3652 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3653 'categories': ['Nonprofits & Activism'],
3654 'tags': list,
3655 'like_count': int,
3656 'dislike_count': int,
3657 },
3658 'params': {
3659 'noplaylist': True,
3660 'skip_download': True,
3661 },
39e7107d 3662 }, {
8bdd16b4 3663 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3664 'only_matching': True,
cdc628a4
PH
3665 }]
3666
8bdd16b4 3667 def _real_extract(self, url):
29f7c58a 3668 mobj = re.match(self._VALID_URL, url)
3669 video_id = mobj.group('id')
3670 playlist_id = mobj.group('playlist_id')
8bdd16b4 3671 return self.url_result(
29f7c58a 3672 update_url_query('https://www.youtube.com/watch', {
3673 'v': video_id,
3674 'list': playlist_id,
3675 'feature': 'youtu.be',
3676 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3677
3678
3679class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3680 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3681 _VALID_URL = r'ytuser:(?P<id>.+)'
3682 _TESTS = [{
3683 'url': 'ytuser:phihag',
3684 'only_matching': True,
3685 }]
3686
3687 def _real_extract(self, url):
3688 user_id = self._match_id(url)
3689 return self.url_result(
3690 'https://www.youtube.com/user/%s' % user_id,
3691 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3692
b05654f0 3693
3d3dddc9 3694class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3695 IE_NAME = 'youtube:favorites'
3696 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3697 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3698 _LOGIN_REQUIRED = True
3699 _TESTS = [{
3700 'url': ':ytfav',
3701 'only_matching': True,
3702 }, {
3703 'url': ':ytfavorites',
3704 'only_matching': True,
3705 }]
3706
3707 def _real_extract(self, url):
3708 return self.url_result(
3709 'https://www.youtube.com/playlist?list=LL',
3710 ie=YoutubeTabIE.ie_key())
3711
3712
79360d99 3713class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3714 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3715 # there doesn't appear to be a real limit, for example if you search for
3716 # 'python' you get more than 8.000.000 results
3717 _MAX_RESULTS = float('inf')
78caa52a 3718 IE_NAME = 'youtube:search'
b05654f0 3719 _SEARCH_KEY = 'ytsearch'
6c894ea1 3720 _SEARCH_PARAMS = None
9dd8e46a 3721 _TESTS = []
b05654f0 3722
6c894ea1 3723 def _entries(self, query, n):
a5c56234 3724 data = {'query': query}
6c894ea1
U
3725 if self._SEARCH_PARAMS:
3726 data['params'] = self._SEARCH_PARAMS
3727 total = 0
3728 for page_num in itertools.count(1):
79360d99 3729 search = self._extract_response(
3730 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3731 check_get_keys=('contents', 'onResponseReceivedCommands')
3732 )
6c894ea1 3733 if not search:
b4c08069 3734 break
6c894ea1
U
3735 slr_contents = try_get(
3736 search,
3737 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3738 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3739 list)
3740 if not slr_contents:
a22b2fd1 3741 break
0366ae87 3742
0366ae87
M
3743 # Youtube sometimes adds promoted content to searches,
3744 # changing the index location of videos and token.
3745 # So we search through all entries till we find them.
30a074c2 3746 continuation_token = None
3747 for slr_content in slr_contents:
a96c6d15 3748 if continuation_token is None:
3749 continuation_token = try_get(
3750 slr_content,
3751 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3752 compat_str)
3753
30a074c2 3754 isr_contents = try_get(
3755 slr_content,
3756 lambda x: x['itemSectionRenderer']['contents'],
3757 list)
9da76d30 3758 if not isr_contents:
30a074c2 3759 continue
3760 for content in isr_contents:
3761 if not isinstance(content, dict):
3762 continue
3763 video = content.get('videoRenderer')
3764 if not isinstance(video, dict):
3765 continue
3766 video_id = video.get('videoId')
3767 if not video_id:
3768 continue
3769
3770 yield self._extract_video(video)
3771 total += 1
3772 if total == n:
3773 return
0366ae87 3774
0366ae87 3775 if not continuation_token:
6c894ea1 3776 break
0366ae87 3777 data['continuation'] = continuation_token
b05654f0 3778
6c894ea1
U
3779 def _get_n_results(self, query, n):
3780 """Get a specified number of results for a query"""
3781 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3782
c9ae7b95 3783
a3dd9248 3784class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3785 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3786 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3787 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3788 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3789
c9ae7b95 3790
386e1dd9 3791class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3792 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3793 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3794 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3795 # _MAX_RESULTS = 100
3462ffa8 3796 _TESTS = [{
3797 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3798 'playlist_mincount': 5,
3799 'info_dict': {
3800 'title': 'youtube-dl test video',
3801 }
3802 }, {
3803 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3804 'only_matching': True,
3805 }]
3806
386e1dd9 3807 @classmethod
3808 def _make_valid_url(cls):
3809 return cls._VALID_URL
3810
3462ffa8 3811 def _real_extract(self, url):
386e1dd9 3812 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3813 query = (qs.get('search_query') or qs.get('q'))[0]
3814 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3815 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3816
3817
3818class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3819 """
25f14e9f 3820 Base class for feed extractors
3d3dddc9 3821 Subclasses must define the _FEED_NAME property.
d7ae0639 3822 """
b2e8bc1b 3823 _LOGIN_REQUIRED = True
ef2f3c7f 3824 _TESTS = []
d7ae0639
JMF
3825
3826 @property
3827 def IE_NAME(self):
78caa52a 3828 return 'youtube:%s' % self._FEED_NAME
04cc9617 3829
81f0259b 3830 def _real_initialize(self):
b2e8bc1b 3831 self._login()
81f0259b 3832
3853309f 3833 def _real_extract(self, url):
3d3dddc9 3834 return self.url_result(
3835 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3836 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3837
3838
ef2f3c7f 3839class YoutubeWatchLaterIE(InfoExtractor):
3840 IE_NAME = 'youtube:watchlater'
70d5c17b 3841 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3842 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3843 _TESTS = [{
8bdd16b4 3844 'url': ':ytwatchlater',
bc7a9cd8
S
3845 'only_matching': True,
3846 }]
25f14e9f
S
3847
3848 def _real_extract(self, url):
ef2f3c7f 3849 return self.url_result(
3850 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3851
3852
25f14e9f
S
3853class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3854 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3855 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3856 _FEED_NAME = 'recommended'
3d3dddc9 3857 _TESTS = [{
3858 'url': ':ytrec',
3859 'only_matching': True,
3860 }, {
3861 'url': ':ytrecommended',
3862 'only_matching': True,
3863 }, {
3864 'url': 'https://youtube.com',
3865 'only_matching': True,
3866 }]
1ed5b5c9 3867
1ed5b5c9 3868
25f14e9f 3869class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3870 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3871 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3872 _FEED_NAME = 'subscriptions'
3d3dddc9 3873 _TESTS = [{
3874 'url': ':ytsubs',
3875 'only_matching': True,
3876 }, {
3877 'url': ':ytsubscriptions',
3878 'only_matching': True,
3879 }]
1ed5b5c9 3880
1ed5b5c9 3881
25f14e9f 3882class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3883 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3884 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3885 _FEED_NAME = 'history'
3d3dddc9 3886 _TESTS = [{
3887 'url': ':ythistory',
3888 'only_matching': True,
3889 }]
1ed5b5c9
JMF
3890
3891
15870e90
PH
3892class YoutubeTruncatedURLIE(InfoExtractor):
3893 IE_NAME = 'youtube:truncated_url'
3894 IE_DESC = False # Do not list
975d35db 3895 _VALID_URL = r'''(?x)
b95aab84
PH
3896 (?:https?://)?
3897 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3898 (?:watch\?(?:
c4808c60 3899 feature=[a-z_]+|
b95aab84
PH
3900 annotation_id=annotation_[^&]+|
3901 x-yt-cl=[0-9]+|
c1708b89 3902 hl=[^&]*|
287be8c6 3903 t=[0-9]+
b95aab84
PH
3904 )?
3905 |
3906 attribution_link\?a=[^&]+
3907 )
3908 $
975d35db 3909 '''
15870e90 3910
c4808c60 3911 _TESTS = [{
2d3d2997 3912 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3913 'only_matching': True,
dc2fc736 3914 }, {
2d3d2997 3915 'url': 'https://www.youtube.com/watch?',
dc2fc736 3916 'only_matching': True,
b95aab84
PH
3917 }, {
3918 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3919 'only_matching': True,
3920 }, {
3921 'url': 'https://www.youtube.com/watch?feature=foo',
3922 'only_matching': True,
c1708b89
PH
3923 }, {
3924 'url': 'https://www.youtube.com/watch?hl=en-GB',
3925 'only_matching': True,
287be8c6
PH
3926 }, {
3927 'url': 'https://www.youtube.com/watch?t=2372',
3928 'only_matching': True,
c4808c60
PH
3929 }]
3930
15870e90
PH
3931 def _real_extract(self, url):
3932 raise ExtractorError(
78caa52a
PH
3933 'Did you forget to quote the URL? Remember that & is a meta '
3934 'character in most shells, so you want to put the URL in quotes, '
3867038a 3935 'like youtube-dl '
2d3d2997 3936 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3937 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3938 expected=True)
772fd5cc
PH
3939
3940
3941class YoutubeTruncatedIDIE(InfoExtractor):
3942 IE_NAME = 'youtube:truncated_id'
3943 IE_DESC = False # Do not list
b95aab84 3944 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3945
3946 _TESTS = [{
3947 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3948 'only_matching': True,
3949 }]
3950
3951 def _real_extract(self, url):
3952 video_id = self._match_id(url)
3953 raise ExtractorError(
3954 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3955 expected=True)