]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube:tab] Support youtube music `MP` pages
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
fe03a6cd 70 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|'
68b91dc9 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
68217024 88 username, password = self._get_login_info()
b2e8bc1b
JMF
89 # No authentication to be performed
90 if username is None:
a06916d9 91 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 93 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 94 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 95 return True
b2e8bc1b 96
7cc3570e
PH
97 login_page = self._download_webpage(
98 self._LOGIN_URL, None,
69ea8ca4
PH
99 note='Downloading login page',
100 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
101 if login_page is False:
102 return
b2e8bc1b 103
1212e997 104 login_form = self._hidden_inputs(login_page)
c5e8d7af 105
e00eb564
S
106 def req(url, f_req, note, errnote):
107 data = login_form.copy()
108 data.update({
109 'pstMsg': 1,
110 'checkConnection': 'youtube',
111 'checkedDomains': 'youtube',
112 'hl': 'en',
113 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 114 'f.req': json.dumps(f_req),
e00eb564
S
115 'flowName': 'GlifWebSignIn',
116 'flowEntry': 'ServiceLogin',
baf67a60
S
117 # TODO: reverse actual botguard identifier generation algo
118 'bgRequest': '["identifier",""]',
041bc3ad 119 })
e00eb564
S
120 return self._download_json(
121 url, None, note=note, errnote=errnote,
122 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
123 fatal=False,
124 data=urlencode_postdata(data), headers={
125 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
126 'Google-Accounts-XSRF': 1,
127 })
128
3995d37d 129 def warn(message):
6a39ee13 130 self.report_warning(message)
3995d37d
S
131
132 lookup_req = [
133 username,
134 None, [], None, 'US', None, None, 2, False, True,
135 [
136 None, None,
137 [2, 1, None, 1,
138 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
139 None, [], 4],
140 1, [None, None, []], None, None, None, True
141 ],
142 username,
143 ]
144
e00eb564 145 lookup_results = req(
3995d37d 146 self._LOOKUP_URL, lookup_req,
e00eb564
S
147 'Looking up account info', 'Unable to look up account info')
148
149 if lookup_results is False:
150 return False
041bc3ad 151
3995d37d
S
152 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
153 if not user_hash:
154 warn('Unable to extract user hash')
155 return False
156
157 challenge_req = [
158 user_hash,
159 None, 1, None, [1, None, None, None, [password, None, True]],
160 [
161 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
162 1, [None, None, []], None, None, None, True
163 ]]
83317f69 164
3995d37d
S
165 challenge_results = req(
166 self._CHALLENGE_URL, challenge_req,
167 'Logging in', 'Unable to log in')
83317f69 168
3995d37d 169 if challenge_results is False:
e00eb564 170 return
83317f69 171
3995d37d
S
172 login_res = try_get(challenge_results, lambda x: x[0][5], list)
173 if login_res:
174 login_msg = try_get(login_res, lambda x: x[5], compat_str)
175 warn(
176 'Unable to login: %s' % 'Invalid password'
177 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
178 return False
179
180 res = try_get(challenge_results, lambda x: x[0][-1], list)
181 if not res:
182 warn('Unable to extract result entry')
183 return False
184
9a6628aa
S
185 login_challenge = try_get(res, lambda x: x[0][0], list)
186 if login_challenge:
187 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
188 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
189 # SEND_SUCCESS - TFA code has been successfully sent to phone
190 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 191 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
192 if status == 'QUOTA_EXCEEDED':
193 warn('Exceeded the limit of TFA codes, try later')
194 return False
195
196 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
197 if not tl:
198 warn('Unable to extract TL')
199 return False
200
201 tfa_code = self._get_tfa_info('2-step verification code')
202
203 if not tfa_code:
204 warn(
205 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
206 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
207 return False
208
209 tfa_code = remove_start(tfa_code, 'G-')
210
211 tfa_req = [
212 user_hash, None, 2, None,
213 [
214 9, None, None, None, None, None, None, None,
215 [None, tfa_code, True, 2]
216 ]]
217
218 tfa_results = req(
219 self._TFA_URL.format(tl), tfa_req,
220 'Submitting TFA code', 'Unable to submit TFA code')
221
222 if tfa_results is False:
223 return False
224
225 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
226 if tfa_res:
227 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
228 warn(
229 'Unable to finish TFA: %s' % 'Invalid TFA code'
230 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
231 return False
232
233 check_cookie_url = try_get(
234 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
235 else:
236 CHALLENGES = {
237 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
238 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
239 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
240 }
241 challenge = CHALLENGES.get(
242 challenge_str,
243 '%s returned error %s.' % (self.IE_NAME, challenge_str))
244 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
245 return False
3995d37d
S
246 else:
247 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
248
249 if not check_cookie_url:
250 warn('Unable to extract CheckCookie URL')
251 return False
e00eb564
S
252
253 check_cookie_results = self._download_webpage(
3995d37d
S
254 check_cookie_url, None, 'Checking cookie', fatal=False)
255
256 if check_cookie_results is False:
257 return False
e00eb564 258
3995d37d
S
259 if 'https://myaccount.google.com/' not in check_cookie_results:
260 warn('Unable to log in')
b2e8bc1b 261 return False
e00eb564 262
b2e8bc1b
JMF
263 return True
264
cce889b9 265 def _initialize_consent(self):
266 cookies = self._get_cookies('https://www.youtube.com/')
267 if cookies.get('__Secure-3PSID'):
268 return
269 consent_id = None
270 consent = cookies.get('CONSENT')
271 if consent:
272 if 'YES' in consent.value:
273 return
274 consent_id = self._search_regex(
275 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
276 if not consent_id:
277 consent_id = random.randint(100, 999)
278 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 279
b2e8bc1b 280 def _real_initialize(self):
cce889b9 281 self._initialize_consent()
b2e8bc1b
JMF
282 if self._downloader is None:
283 return
b2e8bc1b
JMF
284 if not self._login():
285 return
c5e8d7af 286
f4f751af 287 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
288 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 289 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 290 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
291 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 292
a5c56234
M
293 def _generate_sapisidhash_header(self):
294 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
295 if sapisid_cookie is None:
296 return
297 time_now = round(time.time())
298 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
299 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
300
301 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 302 note='Downloading API JSON', errnote='Unable to download API page',
303 context=None, api_key=None):
304
305 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 306 data.update(query)
f4f751af 307 real_headers = self._generate_api_headers()
308 real_headers.update({'content-type': 'application/json'})
309 if headers:
310 real_headers.update(headers)
545cc85d 311 return self._download_json(
a5c56234
M
312 'https://www.youtube.com/youtubei/v1/%s' % ep,
313 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 314 data=json.dumps(data).encode('utf8'), headers=real_headers,
315 query={'key': api_key or self._extract_api_key()})
316
317 def _extract_api_key(self, ytcfg=None):
318 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 319
8bdd16b4 320 def _extract_yt_initial_data(self, video_id, webpage):
321 return self._parse_json(
322 self._search_regex(
29f7c58a 323 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 324 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 325 video_id)
0c148415 326
a1c5d2ca
M
327 def _extract_identity_token(self, webpage, item_id):
328 ytcfg = self._extract_ytcfg(item_id, webpage)
329 if ytcfg:
330 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
331 if token:
332 return token
333 return self._search_regex(
334 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
335 'identity token', default=None)
336
337 @staticmethod
338 def _extract_account_syncid(data):
8ea3f7b9 339 """
340 Extract syncId required to download private playlists of secondary channels
341 @param data Either response or ytcfg
342 """
343 sync_ids = (try_get(
344 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
345 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
346 if len(sync_ids) >= 2 and sync_ids[1]:
347 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
348 # and just "user_syncid||" for primary channel. We only want the channel_syncid
349 return sync_ids[0]
8ea3f7b9 350 # ytcfg includes channel_syncid if on secondary channel
351 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 352
29f7c58a 353 def _extract_ytcfg(self, video_id, webpage):
8c54a305 354 if not webpage:
355 return {}
29f7c58a 356 return self._parse_json(
357 self._search_regex(
358 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 359 default='{}'), video_id, fatal=False) or {}
360
361 def __extract_client_version(self, ytcfg):
362 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
363
364 def _extract_context(self, ytcfg=None):
365 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
366 if context:
367 return context
368
369 # Recreate the client context (required)
370 client_version = self.__extract_client_version(ytcfg)
371 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
372 context = {
373 'client': {
374 'clientName': client_name,
375 'clientVersion': client_version,
376 }
377 }
378 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
379 if visitor_data:
380 context['client']['visitorData'] = visitor_data
381 return context
382
383 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
384 headers = {
385 'X-YouTube-Client-Name': '1',
386 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
387 }
388 if identity_token:
389 headers['x-youtube-identity-token'] = identity_token
390 if account_syncid:
391 headers['X-Goog-PageId'] = account_syncid
392 headers['X-Goog-AuthUser'] = 0
393 if visitor_data:
394 headers['x-goog-visitor-id'] = visitor_data
395 auth = self._generate_sapisidhash_header()
396 if auth is not None:
397 headers['Authorization'] = auth
398 headers['X-Origin'] = 'https://www.youtube.com'
399 return headers
29f7c58a 400
9297939e 401 @staticmethod
402 def is_music_url(url):
403 return re.match(r'https?://music\.youtube\.com/', url) is not None
404
30a074c2 405 def _extract_video(self, renderer):
406 video_id = renderer.get('videoId')
407 title = try_get(
408 renderer,
409 (lambda x: x['title']['runs'][0]['text'],
410 lambda x: x['title']['simpleText']), compat_str)
411 description = try_get(
412 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
413 compat_str)
414 duration = parse_duration(try_get(
415 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
416 view_count_text = try_get(
417 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
418 view_count = str_to_int(self._search_regex(
419 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
420 'view count', default=None))
421 uploader = try_get(
bc2ca1bb 422 renderer,
423 (lambda x: x['ownerText']['runs'][0]['text'],
424 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 425 return {
39ed931e 426 '_type': 'url',
30a074c2 427 'ie_key': YoutubeIE.ie_key(),
428 'id': video_id,
429 'url': video_id,
430 'title': title,
431 'description': description,
432 'duration': duration,
433 'view_count': view_count,
434 'uploader': uploader,
435 }
436
0c148415 437
360e1ca5 438class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 439 IE_DESC = 'YouTube.com'
bc2ca1bb 440 _INVIDIOUS_SITES = (
441 # invidious-redirect websites
442 r'(?:www\.)?redirect\.invidious\.io',
443 r'(?:(?:www|dev)\.)?invidio\.us',
444 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
445 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 446 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 447 r'(?:(?:www|au)\.)?ytprivate\.com',
448 r'(?:www\.)?invidious\.namazso\.eu',
449 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 450 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
451 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
452 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
453 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
454 # youtube-dl invidious instances list
455 r'(?:(?:www|no)\.)?invidiou\.sh',
456 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
457 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 458 r'(?:www\.)?invidious\.mastodon\.host',
459 r'(?:www\.)?invidious\.zapashcanon\.fr',
460 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 461 r'(?:www\.)?invidious\.tinfoil-hat\.net',
462 r'(?:www\.)?invidious\.himiko\.cloud',
463 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 464 r'(?:www\.)?invidious\.tube',
465 r'(?:www\.)?invidiou\.site',
466 r'(?:www\.)?invidious\.site',
467 r'(?:www\.)?invidious\.xyz',
468 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 469 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 470 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 471 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 472 r'(?:www\.)?tube\.poal\.co',
473 r'(?:www\.)?tube\.connect\.cafe',
474 r'(?:www\.)?vid\.wxzm\.sx',
475 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 476 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 477 r'(?:www\.)?yewtu\.be',
478 r'(?:www\.)?yt\.elukerio\.org',
479 r'(?:www\.)?yt\.lelux\.fi',
480 r'(?:www\.)?invidious\.ggc-project\.de',
481 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 482 r'(?:www\.)?ytprivate\.com',
483 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 484 r'(?:www\.)?invidious\.toot\.koeln',
485 r'(?:www\.)?invidious\.fdn\.fr',
486 r'(?:www\.)?watch\.nettohikari\.com',
487 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
488 r'(?:www\.)?qklhadlycap4cnod\.onion',
489 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
490 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
491 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
492 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
493 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
494 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
495 )
cb7dfeea 496 _VALID_URL = r"""(?x)^
c5e8d7af 497 (
edb53e2d 498 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 499 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
500 (?:www\.)?deturl\.com/www\.youtube\.com|
501 (?:www\.)?pwnyoutube\.com|
502 (?:www\.)?hooktube\.com|
503 (?:www\.)?yourepeat\.com|
504 tube\.majestyc\.net|
505 %(invidious)s|
506 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
507 (?:.*?\#/)? # handle anchor (#/) redirect urls
508 (?: # the various things that can precede the ID:
ac7553d0 509 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 510 |(?: # or the v= param in all its forms
f7000f3a 511 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 512 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 513 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
514 v=
515 )
f4b05232 516 ))
cbaed4bb
S
517 |(?:
518 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
519 vid\.plus| # or vid.plus/xxxx
520 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 521 %(invidious)s
cbaed4bb 522 )/
edb53e2d 523 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 524 )
c5e8d7af 525 )? # all until now is optional -> you can pass the naked ID
201c1459 526 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 527 (?(1).+)? # if we found the ID, everything can follow
9297939e 528 (?:\#|$)""" % {
bc2ca1bb 529 'invidious': '|'.join(_INVIDIOUS_SITES),
530 }
e40c758c 531 _PLAYER_INFO_RE = (
cc2db878 532 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
533 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 534 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 535 )
2c62dc26 536 _formats = {
c2d3cb4c 537 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
538 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
539 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
540 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
541 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
542 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
543 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
544 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 545 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 546 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
547 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
548 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
549 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
550 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
551 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 552 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 553 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
554 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 555
556
557 # 3D videos
c2d3cb4c 558 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
559 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
560 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
561 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 562 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
563 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
564 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 565
96fb5605 566 # Apple HTTP Live Streaming
11f12195 567 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 568 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
569 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
570 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
571 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
572 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 573 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
574 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
575
576 # DASH mp4 video
d23028a8
S
577 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
578 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
579 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
580 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
581 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 582 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
583 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
584 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
585 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
586 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
587 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
588 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 589
f6f1fc92 590 # Dash mp4 audio
d23028a8
S
591 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
592 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
593 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
594 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
595 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
596 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
597 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
598
599 # Dash webm
d23028a8
S
600 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
601 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
602 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
603 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
604 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
605 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
606 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
607 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
608 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
609 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
610 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
611 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
612 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
613 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
614 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 615 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
616 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
617 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
618 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
619 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
620 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
621 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
622
623 # Dash webm audio
d23028a8
S
624 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
625 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 626
0857baad 627 # Dash webm audio with opus inside
d23028a8
S
628 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
629 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
630 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 631
ce6b9a2d
PH
632 # RTMP (unnamed)
633 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
634
635 # av01 video only formats sometimes served with "unknown" codecs
636 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
637 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
638 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
639 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 640 }
29f7c58a 641 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 642
fd5c4aab
S
643 _GEO_BYPASS = False
644
78caa52a 645 IE_NAME = 'youtube'
2eb88d95
PH
646 _TESTS = [
647 {
2d3d2997 648 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
649 'info_dict': {
650 'id': 'BaW_jenozKc',
651 'ext': 'mp4',
3867038a 652 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
653 'uploader': 'Philipp Hagemeister',
654 'uploader_id': 'phihag',
ec85ded8 655 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
656 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
657 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 658 'upload_date': '20121002',
3867038a 659 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 660 'categories': ['Science & Technology'],
3867038a 661 'tags': ['youtube-dl'],
556dbe7f 662 'duration': 10,
dbdaaa23 663 'view_count': int,
3e7c1224
PH
664 'like_count': int,
665 'dislike_count': int,
7c80519c 666 'start_time': 1,
297a564b 667 'end_time': 9,
2eb88d95 668 }
0e853ca4 669 },
fccd3771 670 {
4bc3a23e
PH
671 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
672 'note': 'Embed-only video (#1746)',
673 'info_dict': {
674 'id': 'yZIXLfi8CZQ',
675 'ext': 'mp4',
676 'upload_date': '20120608',
677 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
678 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
679 'uploader': 'SET India',
94bfcd23 680 'uploader_id': 'setindia',
ec85ded8 681 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 682 'age_limit': 18,
545cc85d 683 },
684 'skip': 'Private video',
fccd3771 685 },
11b56058 686 {
8bdd16b4 687 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
688 'note': 'Use the first video ID in the URL',
689 'info_dict': {
690 'id': 'BaW_jenozKc',
691 'ext': 'mp4',
3867038a 692 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
693 'uploader': 'Philipp Hagemeister',
694 'uploader_id': 'phihag',
ec85ded8 695 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 696 'upload_date': '20121002',
3867038a 697 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 698 'categories': ['Science & Technology'],
3867038a 699 'tags': ['youtube-dl'],
556dbe7f 700 'duration': 10,
dbdaaa23 701 'view_count': int,
11b56058
PM
702 'like_count': int,
703 'dislike_count': int,
34a7de29
S
704 },
705 'params': {
706 'skip_download': True,
707 },
11b56058 708 },
dd27fd17 709 {
2d3d2997 710 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
711 'note': '256k DASH audio (format 141) via DASH manifest',
712 'info_dict': {
713 'id': 'a9LDPn-MO4I',
714 'ext': 'm4a',
715 'upload_date': '20121002',
716 'uploader_id': '8KVIDEO',
ec85ded8 717 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
718 'description': '',
719 'uploader': '8KVIDEO',
720 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 721 },
4bc3a23e
PH
722 'params': {
723 'youtube_include_dash_manifest': True,
724 'format': '141',
4919603f 725 },
de3c7fe0 726 'skip': 'format 141 not served anymore',
dd27fd17 727 },
8bdd16b4 728 # DASH manifest with encrypted signature
729 {
730 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
731 'info_dict': {
732 'id': 'IB3lcPjvWLA',
733 'ext': 'm4a',
734 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
735 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
736 'duration': 244,
737 'uploader': 'AfrojackVEVO',
738 'uploader_id': 'AfrojackVEVO',
739 'upload_date': '20131011',
cc2db878 740 'abr': 129.495,
8bdd16b4 741 },
742 'params': {
743 'youtube_include_dash_manifest': True,
744 'format': '141/bestaudio[ext=m4a]',
745 },
746 },
aa79ac0c
PH
747 # Controversy video
748 {
749 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
750 'info_dict': {
751 'id': 'T4XJQO3qol8',
752 'ext': 'mp4',
556dbe7f 753 'duration': 219,
aa79ac0c 754 'upload_date': '20100909',
4fe54c12 755 'uploader': 'Amazing Atheist',
aa79ac0c 756 'uploader_id': 'TheAmazingAtheist',
ec85ded8 757 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 758 'title': 'Burning Everyone\'s Koran',
545cc85d 759 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 760 }
c522adb1 761 },
dd2d55f1 762 # Normal age-gate video (embed allowed)
c522adb1 763 {
2d3d2997 764 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
765 'info_dict': {
766 'id': 'HtVdAasjOgU',
767 'ext': 'mp4',
768 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 769 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 770 'duration': 142,
c522adb1
JMF
771 'uploader': 'The Witcher',
772 'uploader_id': 'WitcherGame',
ec85ded8 773 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 774 'upload_date': '20140605',
34952f09 775 'age_limit': 18,
c522adb1
JMF
776 },
777 },
8bdd16b4 778 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
779 # YouTube Red ad is not captured for creator
780 {
781 'url': '__2ABJjxzNo',
782 'info_dict': {
783 'id': '__2ABJjxzNo',
784 'ext': 'mp4',
785 'duration': 266,
786 'upload_date': '20100430',
787 'uploader_id': 'deadmau5',
788 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 789 'creator': 'deadmau5',
790 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 791 'uploader': 'deadmau5',
792 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 793 'alt_title': 'Some Chords',
8bdd16b4 794 },
795 'expected_warnings': [
796 'DASH manifest missing',
797 ]
798 },
067aa17e 799 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
800 {
801 'url': 'lqQg6PlCWgI',
802 'info_dict': {
803 'id': 'lqQg6PlCWgI',
804 'ext': 'mp4',
556dbe7f 805 'duration': 6085,
90227264 806 'upload_date': '20150827',
cbe2bd91 807 'uploader_id': 'olympic',
ec85ded8 808 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 809 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 810 'uploader': 'Olympic',
cbe2bd91
PH
811 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
812 },
813 'params': {
814 'skip_download': 'requires avconv',
e52a40ab 815 }
cbe2bd91 816 },
6271f1ca
PH
817 # Non-square pixels
818 {
819 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
820 'info_dict': {
821 'id': '_b-2C3KPAM0',
822 'ext': 'mp4',
823 'stretched_ratio': 16 / 9.,
556dbe7f 824 'duration': 85,
6271f1ca
PH
825 'upload_date': '20110310',
826 'uploader_id': 'AllenMeow',
ec85ded8 827 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 828 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 829 'uploader': '孫ᄋᄅ',
6271f1ca
PH
830 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
831 },
06b491eb
S
832 },
833 # url_encoded_fmt_stream_map is empty string
834 {
835 'url': 'qEJwOuvDf7I',
836 'info_dict': {
837 'id': 'qEJwOuvDf7I',
f57b7835 838 'ext': 'webm',
06b491eb
S
839 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
840 'description': '',
841 'upload_date': '20150404',
842 'uploader_id': 'spbelect',
843 'uploader': 'Наблюдатели Петербурга',
844 },
845 'params': {
846 'skip_download': 'requires avconv',
e323cf3f
S
847 },
848 'skip': 'This live event has ended.',
06b491eb 849 },
067aa17e 850 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
851 {
852 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
853 'info_dict': {
854 'id': 'FIl7x6_3R5Y',
eb6793ba 855 'ext': 'webm',
da77d856
S
856 'title': 'md5:7b81415841e02ecd4313668cde88737a',
857 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 858 'duration': 220,
da77d856
S
859 'upload_date': '20150625',
860 'uploader_id': 'dorappi2000',
ec85ded8 861 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 862 'uploader': 'dorappi2000',
eb6793ba 863 'formats': 'mincount:31',
da77d856 864 },
eb6793ba 865 'skip': 'not actual anymore',
2ee8f5d8 866 },
8a1a26ce
YCH
867 # DASH manifest with segment_list
868 {
869 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
870 'md5': '8ce563a1d667b599d21064e982ab9e31',
871 'info_dict': {
872 'id': 'CsmdDsKjzN8',
873 'ext': 'mp4',
17ee98e1 874 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
875 'uploader': 'Airtek',
876 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
877 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
878 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
879 },
880 'params': {
881 'youtube_include_dash_manifest': True,
882 'format': '135', # bestvideo
be49068d
S
883 },
884 'skip': 'This live event has ended.',
2ee8f5d8 885 },
cf7e015f
S
886 {
887 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 888 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 889 'info_dict': {
545cc85d 890 'id': 'jvGDaLqkpTg',
891 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
892 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
893 },
894 'playlist': [{
895 'info_dict': {
545cc85d 896 'id': 'jvGDaLqkpTg',
cf7e015f 897 'ext': 'mp4',
545cc85d 898 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
899 'description': 'md5:e03b909557865076822aa169218d6a5d',
900 'duration': 10643,
901 'upload_date': '20161111',
902 'uploader': 'Team PGP',
903 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
904 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
905 },
906 }, {
907 'info_dict': {
545cc85d 908 'id': '3AKt1R1aDnw',
cf7e015f 909 'ext': 'mp4',
545cc85d 910 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
911 'description': 'md5:e03b909557865076822aa169218d6a5d',
912 'duration': 10991,
913 'upload_date': '20161111',
914 'uploader': 'Team PGP',
915 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
916 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
917 },
918 }, {
919 'info_dict': {
545cc85d 920 'id': 'RtAMM00gpVc',
cf7e015f 921 'ext': 'mp4',
545cc85d 922 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
923 'description': 'md5:e03b909557865076822aa169218d6a5d',
924 'duration': 10995,
925 'upload_date': '20161111',
926 'uploader': 'Team PGP',
927 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
928 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
929 },
930 }, {
931 'info_dict': {
545cc85d 932 'id': '6N2fdlP3C5U',
cf7e015f 933 'ext': 'mp4',
545cc85d 934 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
935 'description': 'md5:e03b909557865076822aa169218d6a5d',
936 'duration': 10990,
937 'upload_date': '20161111',
938 'uploader': 'Team PGP',
939 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
940 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
941 },
942 }],
943 'params': {
944 'skip_download': True,
945 },
cbaed4bb 946 },
f9f49d87 947 {
067aa17e 948 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
949 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
950 'info_dict': {
951 'id': 'gVfLd0zydlo',
952 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
953 },
954 'playlist_count': 2,
be49068d 955 'skip': 'Not multifeed anymore',
f9f49d87 956 },
cbaed4bb 957 {
2d3d2997 958 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 959 'only_matching': True,
0e49d9a6 960 },
6d4fc66b 961 {
2d3d2997 962 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
963 'only_matching': True,
964 },
0e49d9a6 965 {
067aa17e 966 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 967 # Also tests cut-off URL expansion in video description (see
067aa17e
S
968 # https://github.com/ytdl-org/youtube-dl/issues/1892,
969 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
970 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
971 'info_dict': {
972 'id': 'lsguqyKfVQg',
973 'ext': 'mp4',
974 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 975 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 976 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 977 'duration': 133,
0e49d9a6
LL
978 'upload_date': '20151119',
979 'uploader_id': 'IronSoulElf',
ec85ded8 980 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 981 'uploader': 'IronSoulElf',
eb6793ba
S
982 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
983 'track': 'Dark Walk - Position Music',
984 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 985 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
986 },
987 'params': {
988 'skip_download': True,
989 },
990 },
61f92af1 991 {
067aa17e 992 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
993 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
994 'only_matching': True,
995 },
313dfc45
LL
996 {
997 # Video with yt:stretch=17:0
998 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
999 'info_dict': {
1000 'id': 'Q39EVAstoRM',
1001 'ext': 'mp4',
1002 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1003 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1004 'upload_date': '20151107',
1005 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1006 'uploader': 'CH GAMER DROID',
1007 },
1008 'params': {
1009 'skip_download': True,
1010 },
be49068d 1011 'skip': 'This video does not exist.',
313dfc45 1012 },
201c1459 1013 {
1014 # Video with incomplete 'yt:stretch=16:'
1015 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1016 'only_matching': True,
1017 },
7caf9830
S
1018 {
1019 # Video licensed under Creative Commons
1020 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1021 'info_dict': {
1022 'id': 'M4gD1WSo5mA',
1023 'ext': 'mp4',
1024 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1025 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1026 'duration': 721,
7caf9830
S
1027 'upload_date': '20150127',
1028 'uploader_id': 'BerkmanCenter',
ec85ded8 1029 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1030 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1031 'license': 'Creative Commons Attribution license (reuse allowed)',
1032 },
1033 'params': {
1034 'skip_download': True,
1035 },
1036 },
fd050249
S
1037 {
1038 # Channel-like uploader_url
1039 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1040 'info_dict': {
1041 'id': 'eQcmzGIKrzg',
1042 'ext': 'mp4',
1043 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1044 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1045 'duration': 4060,
fd050249 1046 'upload_date': '20151119',
eb6793ba 1047 'uploader': 'Bernie Sanders',
fd050249 1048 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1049 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1050 'license': 'Creative Commons Attribution license (reuse allowed)',
1051 },
1052 'params': {
1053 'skip_download': True,
1054 },
1055 },
040ac686
S
1056 {
1057 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1058 'only_matching': True,
7f29cf54
S
1059 },
1060 {
067aa17e 1061 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1062 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1063 'only_matching': True,
6496ccb4
S
1064 },
1065 {
1066 # Rental video preview
1067 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1068 'info_dict': {
1069 'id': 'uGpuVWrhIzE',
1070 'ext': 'mp4',
1071 'title': 'Piku - Trailer',
1072 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1073 'upload_date': '20150811',
1074 'uploader': 'FlixMatrix',
1075 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1076 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1077 'license': 'Standard YouTube License',
1078 },
1079 'params': {
1080 'skip_download': True,
1081 },
eb6793ba 1082 'skip': 'This video is not available.',
022a5d66 1083 },
12afdc2a
S
1084 {
1085 # YouTube Red video with episode data
1086 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1087 'info_dict': {
1088 'id': 'iqKdEhx-dD4',
1089 'ext': 'mp4',
1090 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1091 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1092 'duration': 2085,
12afdc2a
S
1093 'upload_date': '20170118',
1094 'uploader': 'Vsauce',
1095 'uploader_id': 'Vsauce',
1096 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1097 'series': 'Mind Field',
1098 'season_number': 1,
1099 'episode_number': 1,
1100 },
1101 'params': {
1102 'skip_download': True,
1103 },
1104 'expected_warnings': [
1105 'Skipping DASH manifest',
1106 ],
1107 },
c7121fa7
S
1108 {
1109 # The following content has been identified by the YouTube community
1110 # as inappropriate or offensive to some audiences.
1111 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1112 'info_dict': {
1113 'id': '6SJNVb0GnPI',
1114 'ext': 'mp4',
1115 'title': 'Race Differences in Intelligence',
1116 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1117 'duration': 965,
1118 'upload_date': '20140124',
1119 'uploader': 'New Century Foundation',
1120 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1121 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1122 },
1123 'params': {
1124 'skip_download': True,
1125 },
545cc85d 1126 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1127 },
022a5d66
S
1128 {
1129 # itag 212
1130 'url': '1t24XAntNCY',
1131 'only_matching': True,
fd5c4aab
S
1132 },
1133 {
1134 # geo restricted to JP
1135 'url': 'sJL6WA-aGkQ',
1136 'only_matching': True,
1137 },
cd5a74a2
S
1138 {
1139 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1140 'only_matching': True,
1141 },
bc2ca1bb 1142 {
1143 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1144 'only_matching': True,
1145 },
1146 {
1147 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1148 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1149 'only_matching': True,
1150 },
825cd268
RA
1151 {
1152 # DRM protected
1153 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1154 'only_matching': True,
4fe54c12
S
1155 },
1156 {
1157 # Video with unsupported adaptive stream type formats
1158 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1159 'info_dict': {
1160 'id': 'Z4Vy8R84T1U',
1161 'ext': 'mp4',
1162 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1163 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1164 'duration': 433,
1165 'upload_date': '20130923',
1166 'uploader': 'Amelia Putri Harwita',
1167 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1168 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1169 'formats': 'maxcount:10',
1170 },
1171 'params': {
1172 'skip_download': True,
1173 'youtube_include_dash_manifest': False,
1174 },
5429d6a9 1175 'skip': 'not actual anymore',
5caabd3c 1176 },
1177 {
822b9d9c 1178 # Youtube Music Auto-generated description
5caabd3c 1179 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1180 'info_dict': {
1181 'id': 'MgNrAu2pzNs',
1182 'ext': 'mp4',
1183 'title': 'Voyeur Girl',
1184 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1185 'upload_date': '20190312',
5429d6a9
S
1186 'uploader': 'Stephen - Topic',
1187 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1188 'artist': 'Stephen',
1189 'track': 'Voyeur Girl',
1190 'album': 'it\'s too much love to know my dear',
1191 'release_date': '20190313',
1192 'release_year': 2019,
1193 },
1194 'params': {
1195 'skip_download': True,
1196 },
1197 },
66b48727
RA
1198 {
1199 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1200 'only_matching': True,
1201 },
011e75e6
S
1202 {
1203 # invalid -> valid video id redirection
1204 'url': 'DJztXj2GPfl',
1205 'info_dict': {
1206 'id': 'DJztXj2GPfk',
1207 'ext': 'mp4',
1208 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1209 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1210 'upload_date': '20090125',
1211 'uploader': 'Prochorowka',
1212 'uploader_id': 'Prochorowka',
1213 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1214 'artist': 'Panjabi MC',
1215 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1216 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1217 },
1218 'params': {
1219 'skip_download': True,
1220 },
545cc85d 1221 'skip': 'Video unavailable',
ea74e00b
DP
1222 },
1223 {
1224 # empty description results in an empty string
1225 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1226 'info_dict': {
1227 'id': 'x41yOUIvK2k',
1228 'ext': 'mp4',
1229 'title': 'IMG 3456',
1230 'description': '',
1231 'upload_date': '20170613',
1232 'uploader_id': 'ElevageOrVert',
1233 'uploader': 'ElevageOrVert',
1234 },
1235 'params': {
1236 'skip_download': True,
1237 },
1238 },
a0566bbf 1239 {
29f7c58a 1240 # with '};' inside yt initial data (see [1])
1241 # see [2] for an example with '};' inside ytInitialPlayerResponse
1242 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1243 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1244 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1245 'info_dict': {
1246 'id': 'CHqg6qOn4no',
1247 'ext': 'mp4',
1248 'title': 'Part 77 Sort a list of simple types in c#',
1249 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1250 'upload_date': '20130831',
1251 'uploader_id': 'kudvenkat',
1252 'uploader': 'kudvenkat',
1253 },
1254 'params': {
1255 'skip_download': True,
1256 },
1257 },
29f7c58a 1258 {
1259 # another example of '};' in ytInitialData
1260 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1261 'only_matching': True,
1262 },
1263 {
1264 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1265 'only_matching': True,
1266 },
545cc85d 1267 {
cc2db878 1268 # https://github.com/ytdl-org/youtube-dl/pull/28094
1269 'url': 'OtqTfy26tG0',
1270 'info_dict': {
1271 'id': 'OtqTfy26tG0',
1272 'ext': 'mp4',
1273 'title': 'Burn Out',
1274 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1275 'upload_date': '20141120',
1276 'uploader': 'The Cinematic Orchestra - Topic',
1277 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1278 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1279 'artist': 'The Cinematic Orchestra',
1280 'track': 'Burn Out',
1281 'album': 'Every Day',
1282 'release_data': None,
1283 'release_year': None,
1284 },
1285 'params': {
1286 'skip_download': True,
1287 },
545cc85d 1288 },
bc2ca1bb 1289 {
1290 # controversial video, only works with bpctr when authenticated with cookies
1291 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1292 'only_matching': True,
1293 },
f7ad7160 1294 {
1295 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1296 'url': 'cBvYw8_A0vQ',
1297 'info_dict': {
1298 'id': 'cBvYw8_A0vQ',
1299 'ext': 'mp4',
1300 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1301 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1302 'upload_date': '20201120',
1303 'uploader': 'Walk around Japan',
1304 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1305 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1306 },
1307 'params': {
1308 'skip_download': True,
1309 },
0fb983f6 1310 }, {
1311 # Has multiple audio streams
1312 'url': 'WaOKSUlf4TM',
1313 'only_matching': True
9297939e 1314 }, {
1315 # Requires Premium: has format 141 when requested using YTM url
1316 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1317 'only_matching': True
1318 }, {
120916da 1319 # multiple subtitles with same lang_code
1320 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1321 'only_matching': True,
1322 },
2eb88d95
PH
1323 ]
1324
201c1459 1325 @classmethod
1326 def suitable(cls, url):
1bdae7d3 1327 # Hack for lazy extractors until more generic solution is implemented
1328 # (see #28780)
1329 from .youtube import parse_qs
201c1459 1330 qs = parse_qs(url)
1331 if qs.get('list', [None])[0]:
1332 return False
1333 return super(YoutubeIE, cls).suitable(url)
1334
e0df6211
PH
1335 def __init__(self, *args, **kwargs):
1336 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1337 self._code_cache = {}
83799698 1338 self._player_cache = {}
e0df6211 1339
60064c53
PH
1340 def _signature_cache_id(self, example_sig):
1341 """ Return a string representation of a signature """
78caa52a 1342 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1343
e40c758c
S
1344 @classmethod
1345 def _extract_player_info(cls, player_url):
1346 for player_re in cls._PLAYER_INFO_RE:
1347 id_m = re.search(player_re, player_url)
1348 if id_m:
1349 break
1350 else:
c081b35c 1351 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1352 return id_m.group('id')
e40c758c
S
1353
1354 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1355 player_id = self._extract_player_info(player_url)
e0df6211 1356
c4417ddb 1357 # Read from filesystem cache
545cc85d 1358 func_id = 'js_%s_%s' % (
1359 player_id, self._signature_cache_id(example_sig))
c4417ddb 1360 assert os.path.basename(func_id) == func_id
a0e07d31 1361
69ea8ca4 1362 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1363 if cache_spec is not None:
78caa52a 1364 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1365
545cc85d 1366 if player_id not in self._code_cache:
1367 self._code_cache[player_id] = self._download_webpage(
e0df6211 1368 player_url, video_id,
545cc85d 1369 note='Downloading player ' + player_id,
69ea8ca4 1370 errnote='Download of %s failed' % player_url)
545cc85d 1371 code = self._code_cache[player_id]
1372 res = self._parse_sig_js(code)
e0df6211 1373
785521bf
PH
1374 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1375 cache_res = res(test_string)
1376 cache_spec = [ord(c) for c in cache_res]
83799698 1377
69ea8ca4 1378 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1379 return res
1380
60064c53 1381 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1382 def gen_sig_code(idxs):
1383 def _genslice(start, end, step):
78caa52a 1384 starts = '' if start == 0 else str(start)
8bcc8756 1385 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1386 steps = '' if step == 1 else (':%d' % step)
78caa52a 1387 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1388
1389 step = None
7af808a5
PH
1390 # Quelch pyflakes warnings - start will be set when step is set
1391 start = '(Never used)'
edf3e38e
PH
1392 for i, prev in zip(idxs[1:], idxs[:-1]):
1393 if step is not None:
1394 if i - prev == step:
1395 continue
1396 yield _genslice(start, prev, step)
1397 step = None
1398 continue
1399 if i - prev in [-1, 1]:
1400 step = i - prev
1401 start = prev
1402 continue
1403 else:
78caa52a 1404 yield 's[%d]' % prev
edf3e38e 1405 if step is None:
78caa52a 1406 yield 's[%d]' % i
edf3e38e
PH
1407 else:
1408 yield _genslice(start, i, step)
1409
78caa52a 1410 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1411 cache_res = func(test_string)
edf3e38e 1412 cache_spec = [ord(c) for c in cache_res]
78caa52a 1413 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1414 signature_id_tuple = '(%s)' % (
1415 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1416 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1417 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1418 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1419
e0df6211
PH
1420 def _parse_sig_js(self, jscode):
1421 funcname = self._search_regex(
abefc03f
S
1422 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1423 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1424 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1425 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1426 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1427 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1428 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1429 # Obsolete patterns
1430 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1431 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1432 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1433 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1434 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1435 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1436 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1437 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1438 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1439
1440 jsi = JSInterpreter(jscode)
1441 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1442 return lambda s: initial_function([s])
1443
545cc85d 1444 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1445 """Turn the encrypted s field into a working signature"""
6b37f0be 1446
c8bf86d5 1447 if player_url is None:
69ea8ca4 1448 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1449
69ea8ca4 1450 if player_url.startswith('//'):
78caa52a 1451 player_url = 'https:' + player_url
3c90cc8b
S
1452 elif not re.match(r'https?://', player_url):
1453 player_url = compat_urlparse.urljoin(
1454 'https://www.youtube.com', player_url)
c8bf86d5 1455 try:
62af3a0e 1456 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1457 if player_id not in self._player_cache:
1458 func = self._extract_signature_function(
60064c53 1459 video_id, player_url, s
c8bf86d5
PH
1460 )
1461 self._player_cache[player_id] = func
1462 func = self._player_cache[player_id]
a06916d9 1463 if self.get_param('youtube_print_sig_code'):
60064c53 1464 self._print_sig_code(func, s)
c8bf86d5
PH
1465 return func(s)
1466 except Exception as e:
1467 tb = traceback.format_exc()
1468 raise ExtractorError(
78caa52a 1469 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1470
545cc85d 1471 def _mark_watched(self, video_id, player_response):
21c340b8
S
1472 playback_url = url_or_none(try_get(
1473 player_response,
545cc85d 1474 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1475 if not playback_url:
1476 return
1477 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1478 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1479
1480 # cpn generation algorithm is reverse engineered from base.js.
1481 # In fact it works even with dummy cpn.
1482 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1483 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1484
1485 qs.update({
1486 'ver': ['2'],
1487 'cpn': [cpn],
1488 })
1489 playback_url = compat_urlparse.urlunparse(
15707c7e 1490 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1491
1492 self._download_webpage(
1493 playback_url, video_id, 'Marking watched',
1494 'Unable to mark watched', fatal=False)
1495
66c9fa36
S
1496 @staticmethod
1497 def _extract_urls(webpage):
1498 # Embedded YouTube player
1499 entries = [
1500 unescapeHTML(mobj.group('url'))
1501 for mobj in re.finditer(r'''(?x)
1502 (?:
1503 <iframe[^>]+?src=|
1504 data-video-url=|
1505 <embed[^>]+?src=|
1506 embedSWF\(?:\s*|
1507 <object[^>]+data=|
1508 new\s+SWFObject\(
1509 )
1510 (["\'])
1511 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1512 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1513 \1''', webpage)]
1514
1515 # lazyYT YouTube embed
1516 entries.extend(list(map(
1517 unescapeHTML,
1518 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1519
1520 # Wordpress "YouTube Video Importer" plugin
1521 matches = re.findall(r'''(?x)<div[^>]+
1522 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1523 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1524 entries.extend(m[-1] for m in matches)
1525
1526 return entries
1527
1528 @staticmethod
1529 def _extract_url(webpage):
1530 urls = YoutubeIE._extract_urls(webpage)
1531 return urls[0] if urls else None
1532
97665381
PH
1533 @classmethod
1534 def extract_id(cls, url):
1535 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1536 if mobj is None:
69ea8ca4 1537 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1538 video_id = mobj.group(2)
1539 return video_id
1540
545cc85d 1541 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1542 chapters_list = try_get(
8bdd16b4 1543 data,
84213ea8
S
1544 lambda x: x['playerOverlays']
1545 ['playerOverlayRenderer']
1546 ['decoratedPlayerBarRenderer']
1547 ['decoratedPlayerBarRenderer']
1548 ['playerBar']
1549 ['chapteredPlayerBarRenderer']
1550 ['chapters'],
1551 list)
1552 if not chapters_list:
1553 return
1554
1555 def chapter_time(chapter):
1556 return float_or_none(
1557 try_get(
1558 chapter,
1559 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1560 int),
1561 scale=1000)
1562 chapters = []
1563 for next_num, chapter in enumerate(chapters_list, start=1):
1564 start_time = chapter_time(chapter)
1565 if start_time is None:
1566 continue
1567 end_time = (chapter_time(chapters_list[next_num])
1568 if next_num < len(chapters_list) else duration)
1569 if end_time is None:
1570 continue
1571 title = try_get(
1572 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1573 compat_str)
1574 chapters.append({
1575 'start_time': start_time,
1576 'end_time': end_time,
1577 'title': title,
1578 })
1579 return chapters
1580
545cc85d 1581 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1582 return self._parse_json(self._search_regex(
1583 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1584 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1585
d92f5d5a 1586 @staticmethod
1587 def parse_time_text(time_text):
1588 """
1589 Parse the comment time text
1590 time_text is in the format 'X units ago (edited)'
1591 """
1592 time_text_split = time_text.split(' ')
1593 if len(time_text_split) >= 3:
1594 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1595
a1c5d2ca
M
1596 @staticmethod
1597 def _join_text_entries(runs):
1598 text = None
1599 for run in runs:
1600 if not isinstance(run, dict):
1601 continue
1602 sub_text = try_get(run, lambda x: x['text'], compat_str)
1603 if sub_text:
1604 if not text:
1605 text = sub_text
1606 continue
1607 text += sub_text
1608 return text
1609
1610 def _extract_comment(self, comment_renderer, parent=None):
1611 comment_id = comment_renderer.get('commentId')
1612 if not comment_id:
1613 return
1614 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1615 text = self._join_text_entries(comment_text_runs) or ''
1616 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1617 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1618 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1619 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1620 author_id = try_get(comment_renderer,
1621 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1622 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1623 lambda x: x['likeCount']), compat_str)) or 0
1624 author_thumbnail = try_get(comment_renderer,
1625 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1626
1627 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1628 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1629 return {
1630 'id': comment_id,
1631 'text': text,
d92f5d5a 1632 'timestamp': timestamp,
a1c5d2ca
M
1633 'time_text': time_text,
1634 'like_count': votes,
1635 'is_favorited': is_liked,
1636 'author': author,
1637 'author_id': author_id,
1638 'author_thumbnail': author_thumbnail,
1639 'author_is_uploader': author_is_uploader,
1640 'parent': parent or 'root'
1641 }
1642
1643 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1644 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1645
1646 def extract_thread(parent_renderer):
1647 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1648 if not parent:
1649 comment_counts[2] = 0
1650 for content in contents:
1651 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1652 comment_renderer = try_get(
1653 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1654 content, (lambda x: x['commentRenderer'], dict))
1655
1656 if not comment_renderer:
1657 continue
1658 comment = self._extract_comment(comment_renderer, parent)
1659 if not comment:
1660 continue
1661 comment_counts[0] += 1
1662 yield comment
1663 # Attempt to get the replies
1664 comment_replies_renderer = try_get(
1665 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1666
1667 if comment_replies_renderer:
1668 comment_counts[2] += 1
1669 comment_entries_iter = self._comment_entries(
f4f751af 1670 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1671 parent=comment.get('id'), session_token_list=session_token_list,
1672 comment_counts=comment_counts)
1673
1674 for reply_comment in comment_entries_iter:
1675 yield reply_comment
1676
1677 if not comment_counts:
1678 # comment so far, est. total comments, current comment thread #
1679 comment_counts = [0, 0, 0]
a1c5d2ca
M
1680
1681 # TODO: Generalize the download code with TabIE
f4f751af 1682 context = self._extract_context(ytcfg)
1683 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1684 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1685 first_continuation = False
1686 if parent is None:
1687 first_continuation = True
1688
1689 for page_num in itertools.count(0):
1690 if not continuation:
1691 break
f4f751af 1692 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a06916d9 1693 retries = self.get_param('extractor_retries', 3)
a1c5d2ca
M
1694 count = -1
1695 last_error = None
1696
1697 while count < retries:
1698 count += 1
1699 if last_error:
1700 self.report_warning('%s. Retrying ...' % last_error)
1701 try:
1702 query = {
1703 'ctoken': continuation['ctoken'],
1704 'pbj': 1,
1705 'type': 'next',
1706 }
1707 if parent:
1708 query['action_get_comment_replies'] = 1
1709 else:
1710 query['action_get_comments'] = 1
1711
1712 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1713 if page_num == 0:
1714 if first_continuation:
d92f5d5a 1715 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1716 else:
d92f5d5a 1717 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1718 else:
d92f5d5a 1719 note_prefix = '%sDownloading comment%s page %d %s' % (
1720 ' ' if parent else '',
a1c5d2ca
M
1721 ' replies' if parent else '',
1722 page_num,
1723 comment_prog_str)
1724
1725 browse = self._download_json(
1726 'https://www.youtube.com/comment_service_ajax', None,
1727 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1728 headers=headers, query=query,
1729 data=urlencode_postdata({
1730 'session_token': session_token_list[0]
1731 }))
1732 except ExtractorError as e:
1733 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1734 if e.cause.code == 413:
d92f5d5a 1735 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1736 return
1737 # Downloading page may result in intermittent 5xx HTTP error
1738 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1739 last_error = 'HTTP Error %s' % e.cause.code
1740 if e.cause.code == 404:
d92f5d5a 1741 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1742 if count < retries:
1743 continue
1744 raise
1745 else:
1746 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1747 if session_token:
1748 session_token_list[0] = session_token
1749
1750 response = try_get(browse,
1751 (lambda x: x['response'],
1752 lambda x: x[1]['response'])) or {}
1753
1754 if response.get('continuationContents'):
1755 break
1756
1757 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1758 if browse.get('reload'):
d92f5d5a 1759 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1760
1761 # TODO: not tested, merged from old extractor
1762 err_msg = browse.get('externalErrorMessage')
1763 if err_msg:
1764 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1765
1766 # Youtube sometimes sends incomplete data
1767 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1768 last_error = 'Incomplete data received'
1769 if count >= retries:
6a39ee13 1770 raise ExtractorError(last_error)
a1c5d2ca
M
1771
1772 if not response:
1773 break
f4f751af 1774 visitor_data = try_get(
1775 response,
1776 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1777 compat_str) or visitor_data
a1c5d2ca
M
1778
1779 known_continuation_renderers = {
1780 'itemSectionContinuation': extract_thread,
1781 'commentRepliesContinuation': extract_thread
1782 }
1783
1784 # extract next root continuation from the results
1785 continuation_contents = try_get(
1786 response, lambda x: x['continuationContents'], dict) or {}
1787
1788 for key, value in continuation_contents.items():
1789 if key not in known_continuation_renderers:
1790 continue
1791 continuation_renderer = value
1792
1793 if first_continuation:
1794 first_continuation = False
1795 expected_comment_count = try_get(
1796 continuation_renderer,
1797 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1798 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1799 compat_str)
1800
1801 if expected_comment_count:
1802 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1803 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1804 yield comment_counts[1]
1805
1806 # TODO: cli arg.
1807 # 1/True for newest, 0/False for popular (default)
1808 comment_sort_index = int(True)
1809 sort_continuation_renderer = try_get(
1810 continuation_renderer,
1811 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1812 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1813 # If this fails, the initial continuation page
1814 # starts off with popular anyways.
1815 if sort_continuation_renderer:
1816 continuation = YoutubeTabIE._build_continuation_query(
1817 continuation=sort_continuation_renderer.get('continuation'),
1818 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1819 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1820 break
1821
1822 for entry in known_continuation_renderers[key](continuation_renderer):
1823 yield entry
1824
1825 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1826 break
1827
1828 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1829 """Entry for comment extraction"""
1830 comments = []
1831 known_entry_comment_renderers = (
1832 'itemSectionRenderer',
1833 )
1834 estimated_total = 0
1835 for entry in contents:
1836 for key, renderer in entry.items():
1837 if key not in known_entry_comment_renderers:
1838 continue
1839
1840 comment_iter = self._comment_entries(
1841 renderer,
1842 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1843 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1844 ytcfg=ytcfg,
a1c5d2ca
M
1845 session_token_list=[xsrf_token])
1846
1847 for comment in comment_iter:
1848 if isinstance(comment, int):
1849 estimated_total = comment
1850 continue
1851 comments.append(comment)
1852 break
d92f5d5a 1853 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1854 return {
1855 'comments': comments,
1856 'comment_count': len(comments),
1857 }
1858
c5e8d7af 1859 def _real_extract(self, url):
cf7e015f 1860 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1861 video_id = self._match_id(url)
9297939e 1862
1863 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
1864
545cc85d 1865 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1866 webpage_url = base_url + 'watch?v=' + video_id
1867 webpage = self._download_webpage(
cce889b9 1868 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1869
9297939e 1870 def get_text(x):
1871 if not x:
1872 return
1873 text = x.get('simpleText')
1874 if text and isinstance(text, compat_str):
1875 return text
1876 runs = x.get('runs')
1877 if not isinstance(runs, list):
1878 return
1879 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
1880
1881 ytm_streaming_data = {}
1882 if is_music_url:
1883 # we are forcing to use parse_json because 141 only appeared in get_video_info.
1884 # el, c, cver, cplayer field required for 141(aac 256kbps) codec
1885 # maybe paramter of youtube music player?
1886 ytm_player_response = self._parse_json(try_get(compat_parse_qs(
1887 self._download_webpage(
1888 base_url + 'get_video_info', video_id,
fe03a6cd 1889 'Fetching youtube music info webpage',
1890 'unable to download youtube music info webpage', query={
9297939e 1891 'video_id': video_id,
1892 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1893 'el': 'detailpage',
1894 'c': 'WEB_REMIX',
1895 'cver': '0.1',
1896 'cplayer': 'UNIPLAYER'
1897 }, fatal=False)),
1898 lambda x: x['player_response'][0],
1899 compat_str) or '{}', video_id)
1900 ytm_streaming_data = ytm_player_response.get('streamingData') or {}
1901
545cc85d 1902 player_response = None
1903 if webpage:
1904 player_response = self._extract_yt_initial_variable(
1905 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1906 video_id, 'initial player response')
f4f751af 1907
1908 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1909 if not player_response:
1910 player_response = self._call_api(
f4f751af 1911 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1912
1913 playability_status = player_response.get('playabilityStatus') or {}
1914 if playability_status.get('reason') == 'Sign in to confirm your age':
1915 pr = self._parse_json(try_get(compat_parse_qs(
1916 self._download_webpage(
1917 base_url + 'get_video_info', video_id,
1918 'Refetching age-gated info webpage',
1919 'unable to download video info webpage', query={
1920 'video_id': video_id,
7c60c33e 1921 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1922 }, fatal=False)),
1923 lambda x: x['player_response'][0],
1924 compat_str) or '{}', video_id)
1925 if pr:
1926 player_response = pr
1927
1928 trailer_video_id = try_get(
1929 playability_status,
1930 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1931 compat_str)
1932 if trailer_video_id:
1933 return self.url_result(
1934 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1935
545cc85d 1936 search_meta = (
1937 lambda x: self._html_search_meta(x, webpage, default=None)) \
1938 if webpage else lambda x: None
dbdaaa23 1939
545cc85d 1940 video_details = player_response.get('videoDetails') or {}
37357d21 1941 microformat = try_get(
545cc85d 1942 player_response,
1943 lambda x: x['microformat']['playerMicroformatRenderer'],
1944 dict) or {}
1945 video_title = video_details.get('title') \
1946 or get_text(microformat.get('title')) \
1947 or search_meta(['og:title', 'twitter:title', 'title'])
1948 video_description = video_details.get('shortDescription')
cf7e015f 1949
8fe10494 1950 if not smuggled_data.get('force_singlefeed', False):
a06916d9 1951 if not self.get_param('noplaylist'):
8fe10494
S
1952 multifeed_metadata_list = try_get(
1953 player_response,
1954 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1955 compat_str)
8fe10494
S
1956 if multifeed_metadata_list:
1957 entries = []
1958 feed_ids = []
1959 for feed in multifeed_metadata_list.split(','):
1960 # Unquote should take place before split on comma (,) since textual
1961 # fields may contain comma as well (see
067aa17e 1962 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1963 feed_data = compat_parse_qs(
1964 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1965
1966 def feed_entry(name):
545cc85d 1967 return try_get(
1968 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1969
1970 feed_id = feed_entry('id')
1971 if not feed_id:
1972 continue
1973 feed_title = feed_entry('title')
1974 title = video_title
1975 if feed_title:
1976 title += ' (%s)' % feed_title
8fe10494
S
1977 entries.append({
1978 '_type': 'url_transparent',
1979 'ie_key': 'Youtube',
1980 'url': smuggle_url(
545cc85d 1981 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1982 {'force_singlefeed': True}),
6b09401b 1983 'title': title,
8fe10494 1984 })
6b09401b 1985 feed_ids.append(feed_id)
8fe10494
S
1986 self.to_screen(
1987 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1988 % (', '.join(feed_ids), video_id))
545cc85d 1989 return self.playlist_result(
1990 entries, video_id, video_title, video_description)
8fe10494
S
1991 else:
1992 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1993
9297939e 1994 formats, itags, stream_ids = [], [], []
cc2db878 1995 itag_qualities = {}
545cc85d 1996 player_url = None
dca3ff4a 1997 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
9297939e 1998
545cc85d 1999 streaming_data = player_response.get('streamingData') or {}
2000 streaming_formats = streaming_data.get('formats') or []
2001 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2002 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2003 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2004
545cc85d 2005 for fmt in streaming_formats:
2006 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2007 continue
321bf820 2008
cc2db878 2009 itag = str_or_none(fmt.get('itag'))
9297939e 2010 audio_track = fmt.get('audioTrack') or {}
2011 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2012 if stream_id in stream_ids:
2013 continue
2014
cc2db878 2015 quality = fmt.get('quality')
2016 if itag and quality:
2017 itag_qualities[itag] = quality
2018 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2019 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2020 # number of fragment that would subsequently requested with (`&sq=N`)
2021 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2022 continue
2023
545cc85d 2024 fmt_url = fmt.get('url')
2025 if not fmt_url:
2026 sc = compat_parse_qs(fmt.get('signatureCipher'))
2027 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2028 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2029 if not (sc and fmt_url and encrypted_sig):
2030 continue
2031 if not player_url:
2032 if not webpage:
2033 continue
2034 player_url = self._search_regex(
2035 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
2036 webpage, 'player URL', fatal=False)
2037 if not player_url:
201e9eaa 2038 continue
545cc85d 2039 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2040 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2041 fmt_url += '&' + sp + '=' + signature
2042
545cc85d 2043 if itag:
2044 itags.append(itag)
9297939e 2045 stream_ids.append(stream_id)
2046
cc2db878 2047 tbr = float_or_none(
2048 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2049 dct = {
2050 'asr': int_or_none(fmt.get('audioSampleRate')),
2051 'filesize': int_or_none(fmt.get('contentLength')),
2052 'format_id': itag,
0fb983f6 2053 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2054 'fps': int_or_none(fmt.get('fps')),
2055 'height': int_or_none(fmt.get('height')),
dca3ff4a 2056 'quality': q(quality),
cc2db878 2057 'tbr': tbr,
545cc85d 2058 'url': fmt_url,
2059 'width': fmt.get('width'),
0fb983f6 2060 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2061 }
2062 mimetype = fmt.get('mimeType')
2063 if mimetype:
2064 mobj = re.match(
2065 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2066 if mobj:
2067 dct['ext'] = mimetype2ext(mobj.group(1))
2068 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2069 no_audio = dct.get('acodec') == 'none'
2070 no_video = dct.get('vcodec') == 'none'
2071 if no_audio:
2072 dct['vbr'] = tbr
2073 if no_video:
2074 dct['abr'] = tbr
2075 if no_audio or no_video:
545cc85d 2076 dct['downloader_options'] = {
2077 # Youtube throttles chunks >~10M
2078 'http_chunk_size': 10485760,
bf1317d2 2079 }
7c60c33e 2080 if dct.get('ext'):
2081 dct['container'] = dct['ext'] + '_dash'
545cc85d 2082 formats.append(dct)
2083
9297939e 2084 for sd in (streaming_data, ytm_streaming_data):
2085 hls_manifest_url = sd.get('hlsManifestUrl')
2086 if hls_manifest_url:
2087 for f in self._extract_m3u8_formats(
2088 hls_manifest_url, video_id, 'mp4', fatal=False):
2089 itag = self._search_regex(
2090 r'/itag/(\d+)', f['url'], 'itag', default=None)
2091 if itag:
2092 f['format_id'] = itag
545cc85d 2093 formats.append(f)
2094
a06916d9 2095 if self.get_param('youtube_include_dash_manifest', True):
9297939e 2096 for sd in (streaming_data, ytm_streaming_data):
2097 dash_manifest_url = sd.get('dashManifestUrl')
2098 if dash_manifest_url:
2099 for f in self._extract_mpd_formats(
2100 dash_manifest_url, video_id, fatal=False):
2101 itag = f['format_id']
2102 if itag in itags:
2103 continue
2104 if itag in itag_qualities:
2105 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
2106 # but kept to maintain feature parity (and code similarity) with youtube-dl
2107 # Remove if this causes any issues with sorting in future
2108 f['quality'] = q(itag_qualities[itag])
2109 filesize = int_or_none(self._search_regex(
2110 r'/clen/(\d+)', f.get('fragment_base_url')
2111 or f['url'], 'file size', default=None))
2112 if filesize:
2113 f['filesize'] = filesize
2114 formats.append(f)
bf1317d2 2115
545cc85d 2116 if not formats:
a06916d9 2117 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2118 self.raise_no_formats(
545cc85d 2119 'This video is DRM protected.', expected=True)
2120 pemr = try_get(
2121 playability_status,
2122 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2123 dict) or {}
2124 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2125 subreason = pemr.get('subreason')
2126 if subreason:
2127 subreason = clean_html(get_text(subreason))
2128 if subreason == 'The uploader has not made this video available in your country.':
2129 countries = microformat.get('availableCountries')
2130 if not countries:
2131 regions_allowed = search_meta('regionsAllowed')
2132 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2133 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2134 reason += '\n' + subreason
2135 if reason:
b7da73eb 2136 self.raise_no_formats(reason, expected=True)
bf1317d2 2137
545cc85d 2138 self._sort_formats(formats)
bf1317d2 2139
545cc85d 2140 keywords = video_details.get('keywords') or []
2141 if not keywords and webpage:
2142 keywords = [
2143 unescapeHTML(m.group('content'))
2144 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2145 for keyword in keywords:
2146 if keyword.startswith('yt:stretch='):
201c1459 2147 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2148 if mobj:
2149 # NB: float is intentional for forcing float division
2150 w, h = (float(v) for v in mobj.groups())
2151 if w > 0 and h > 0:
2152 ratio = w / h
2153 for f in formats:
2154 if f.get('vcodec') != 'none':
2155 f['stretched_ratio'] = ratio
2156 break
6449cd80 2157
545cc85d 2158 thumbnails = []
2159 for container in (video_details, microformat):
2160 for thumbnail in (try_get(
2161 container,
2162 lambda x: x['thumbnail']['thumbnails'], list) or []):
2163 thumbnail_url = thumbnail.get('url')
2164 if not thumbnail_url:
bf1317d2 2165 continue
1988fab7 2166 # Sometimes youtube gives a wrong thumbnail URL. See:
2167 # https://github.com/yt-dlp/yt-dlp/issues/233
2168 # https://github.com/ytdl-org/youtube-dl/issues/28023
2169 if 'maxresdefault' in thumbnail_url:
2170 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2171 thumbnails.append({
2172 'height': int_or_none(thumbnail.get('height')),
2173 'url': thumbnail_url,
2174 'width': int_or_none(thumbnail.get('width')),
2175 })
2176 if thumbnails:
2177 break
a6211d23 2178 else:
545cc85d 2179 thumbnail = search_meta(['og:image', 'twitter:image'])
2180 if thumbnail:
2181 thumbnails = [{'url': thumbnail}]
2182
2183 category = microformat.get('category') or search_meta('genre')
2184 channel_id = video_details.get('channelId') \
2185 or microformat.get('externalChannelId') \
2186 or search_meta('channelId')
2187 duration = int_or_none(
2188 video_details.get('lengthSeconds')
2189 or microformat.get('lengthSeconds')) \
2190 or parse_duration(search_meta('duration'))
2191 is_live = video_details.get('isLive')
2192 owner_profile_url = microformat.get('ownerProfileUrl')
2193
2194 info = {
2195 'id': video_id,
2196 'title': self._live_title(video_title) if is_live else video_title,
2197 'formats': formats,
2198 'thumbnails': thumbnails,
2199 'description': video_description,
2200 'upload_date': unified_strdate(
2201 microformat.get('uploadDate')
2202 or search_meta('uploadDate')),
2203 'uploader': video_details['author'],
2204 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2205 'uploader_url': owner_profile_url,
2206 'channel_id': channel_id,
2207 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2208 'duration': duration,
2209 'view_count': int_or_none(
2210 video_details.get('viewCount')
2211 or microformat.get('viewCount')
2212 or search_meta('interactionCount')),
2213 'average_rating': float_or_none(video_details.get('averageRating')),
2214 'age_limit': 18 if (
2215 microformat.get('isFamilySafe') is False
2216 or search_meta('isFamilyFriendly') == 'false'
2217 or search_meta('og:restrictions:age') == '18+') else 0,
2218 'webpage_url': webpage_url,
2219 'categories': [category] if category else None,
2220 'tags': keywords,
2221 'is_live': is_live,
2222 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2223 'was_live': video_details.get('isLiveContent'),
545cc85d 2224 }
b477fc13 2225
545cc85d 2226 pctr = try_get(
2227 player_response,
2228 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2229 subtitles = {}
2230 if pctr:
774d79cc 2231 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2232 lang_subs = container.setdefault(lang_code, [])
545cc85d 2233 for fmt in self._SUBTITLE_FORMATS:
2234 query.update({
2235 'fmt': fmt,
2236 })
2237 lang_subs.append({
2238 'ext': fmt,
2239 'url': update_url_query(base_url, query),
774d79cc 2240 'name': sub_name,
545cc85d 2241 })
7e72694b 2242
545cc85d 2243 for caption_track in (pctr.get('captionTracks') or []):
2244 base_url = caption_track.get('baseUrl')
2245 if not base_url:
2246 continue
2247 if caption_track.get('kind') != 'asr':
120916da 2248 lang_code = (
2249 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2250 or caption_track.get('languageCode'))
545cc85d 2251 if not lang_code:
2252 continue
2253 process_language(
774d79cc 2254 subtitles, base_url, lang_code,
2255 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2256 {})
545cc85d 2257 continue
2258 automatic_captions = {}
2259 for translation_language in (pctr.get('translationLanguages') or []):
2260 translation_language_code = translation_language.get('languageCode')
2261 if not translation_language_code:
2262 continue
2263 process_language(
2264 automatic_captions, base_url, translation_language_code,
774d79cc 2265 try_get(translation_language, lambda x: x['languageName']['simpleText']),
545cc85d 2266 {'tlang': translation_language_code})
2267 info['automatic_captions'] = automatic_captions
2268 info['subtitles'] = subtitles
7e72694b 2269
545cc85d 2270 parsed_url = compat_urllib_parse_urlparse(url)
2271 for component in [parsed_url.fragment, parsed_url.query]:
2272 query = compat_parse_qs(component)
2273 for k, v in query.items():
2274 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2275 d_k += '_time'
2276 if d_k not in info and k in s_ks:
2277 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2278
2279 # Youtube Music Auto-generated description
822b9d9c 2280 if video_description:
38d70284 2281 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2282 if mobj:
822b9d9c
RA
2283 release_year = mobj.group('release_year')
2284 release_date = mobj.group('release_date')
2285 if release_date:
2286 release_date = release_date.replace('-', '')
2287 if not release_year:
545cc85d 2288 release_year = release_date[:4]
2289 info.update({
2290 'album': mobj.group('album'.strip()),
2291 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2292 'track': mobj.group('track').strip(),
2293 'release_date': release_date,
cc2db878 2294 'release_year': int_or_none(release_year),
545cc85d 2295 })
7e72694b 2296
545cc85d 2297 initial_data = None
2298 if webpage:
2299 initial_data = self._extract_yt_initial_variable(
2300 webpage, self._YT_INITIAL_DATA_RE, video_id,
2301 'yt initial data')
2302 if not initial_data:
2303 initial_data = self._call_api(
f4f751af 2304 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2305
2306 if not is_live:
2307 try:
2308 # This will error if there is no livechat
2309 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2310 info['subtitles']['live_chat'] = [{
394dcd44 2311 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2312 'video_id': video_id,
2313 'ext': 'json',
2314 'protocol': 'youtube_live_chat_replay',
2315 }]
2316 except (KeyError, IndexError, TypeError):
2317 pass
2318
2319 if initial_data:
2320 chapters = self._extract_chapters_from_json(
2321 initial_data, video_id, duration)
2322 if not chapters:
2323 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2324 contents = try_get(
2325 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2326 list)
2327 if not contents:
2328 continue
2329
2330 def chapter_time(mmlir):
2331 return parse_duration(
2332 get_text(mmlir.get('timeDescription')))
2333
2334 chapters = []
2335 for next_num, content in enumerate(contents, start=1):
2336 mmlir = content.get('macroMarkersListItemRenderer') or {}
2337 start_time = chapter_time(mmlir)
2338 end_time = chapter_time(try_get(
2339 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2340 if next_num < len(contents) else duration
2341 if start_time is None or end_time is None:
2342 continue
2343 chapters.append({
2344 'start_time': start_time,
2345 'end_time': end_time,
2346 'title': get_text(mmlir.get('title')),
2347 })
2348 if chapters:
2349 break
2350 if chapters:
2351 info['chapters'] = chapters
2352
2353 contents = try_get(
2354 initial_data,
2355 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2356 list) or []
2357 for content in contents:
2358 vpir = content.get('videoPrimaryInfoRenderer')
2359 if vpir:
2360 stl = vpir.get('superTitleLink')
2361 if stl:
2362 stl = get_text(stl)
2363 if try_get(
2364 vpir,
2365 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2366 info['location'] = stl
2367 else:
2368 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2369 if mobj:
2370 info.update({
2371 'series': mobj.group(1),
2372 'season_number': int(mobj.group(2)),
2373 'episode_number': int(mobj.group(3)),
2374 })
2375 for tlb in (try_get(
2376 vpir,
2377 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2378 list) or []):
2379 tbr = tlb.get('toggleButtonRenderer') or {}
2380 for getter, regex in [(
2381 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2382 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2383 lambda x: x['accessibility'],
2384 lambda x: x['accessibilityData']['accessibilityData'],
2385 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2386 label = (try_get(tbr, getter, dict) or {}).get('label')
2387 if label:
2388 mobj = re.match(regex, label)
2389 if mobj:
2390 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2391 break
2392 sbr_tooltip = try_get(
2393 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2394 if sbr_tooltip:
2395 like_count, dislike_count = sbr_tooltip.split(' / ')
2396 info.update({
2397 'like_count': str_to_int(like_count),
2398 'dislike_count': str_to_int(dislike_count),
2399 })
2400 vsir = content.get('videoSecondaryInfoRenderer')
2401 if vsir:
2402 info['channel'] = get_text(try_get(
2403 vsir,
2404 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2405 dict))
545cc85d 2406 rows = try_get(
2407 vsir,
2408 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2409 list) or []
2410 multiple_songs = False
2411 for row in rows:
2412 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2413 multiple_songs = True
2414 break
2415 for row in rows:
2416 mrr = row.get('metadataRowRenderer') or {}
2417 mrr_title = mrr.get('title')
2418 if not mrr_title:
2419 continue
2420 mrr_title = get_text(mrr['title'])
2421 mrr_contents_text = get_text(mrr['contents'][0])
2422 if mrr_title == 'License':
2423 info['license'] = mrr_contents_text
2424 elif not multiple_songs:
2425 if mrr_title == 'Album':
2426 info['album'] = mrr_contents_text
2427 elif mrr_title == 'Artist':
2428 info['artist'] = mrr_contents_text
2429 elif mrr_title == 'Song':
2430 info['track'] = mrr_contents_text
2431
2432 fallbacks = {
2433 'channel': 'uploader',
2434 'channel_id': 'uploader_id',
2435 'channel_url': 'uploader_url',
2436 }
2437 for to, frm in fallbacks.items():
2438 if not info.get(to):
2439 info[to] = info.get(frm)
2440
2441 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2442 v = info.get(s_k)
2443 if v:
2444 info[d_k] = v
b84071c0 2445
c224251a
M
2446 is_private = bool_or_none(video_details.get('isPrivate'))
2447 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2448 is_membersonly = None
b28f8d24 2449 is_premium = None
c224251a
M
2450 if initial_data and is_private is not None:
2451 is_membersonly = False
b28f8d24 2452 is_premium = False
c224251a
M
2453 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2454 for content in contents or []:
2455 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2456 for badge in badges or []:
2457 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2458 if label.lower() == 'members only':
2459 is_membersonly = True
2460 break
b28f8d24
M
2461 elif label.lower() == 'premium':
2462 is_premium = True
2463 break
2464 if is_membersonly or is_premium:
c224251a
M
2465 break
2466
2467 # TODO: Add this for playlists
2468 info['availability'] = self._availability(
2469 is_private=is_private,
b28f8d24 2470 needs_premium=is_premium,
c224251a
M
2471 needs_subscription=is_membersonly,
2472 needs_auth=info['age_limit'] >= 18,
2473 is_unlisted=None if is_private is None else is_unlisted)
2474
06167fbb 2475 # get xsrf for annotations or comments
a06916d9 2476 get_annotations = self.get_param('writeannotations', False)
2477 get_comments = self.get_param('getcomments', False)
06167fbb 2478 if get_annotations or get_comments:
29f7c58a 2479 xsrf_token = None
545cc85d 2480 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2481 if ytcfg:
2482 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2483 if not xsrf_token:
2484 xsrf_token = self._search_regex(
2485 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2486 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2487
2488 # annotations
06167fbb 2489 if get_annotations:
64b6a4e9
RA
2490 invideo_url = try_get(
2491 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2492 if xsrf_token and invideo_url:
29f7c58a 2493 xsrf_field_name = None
2494 if ytcfg:
2495 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2496 if not xsrf_field_name:
2497 xsrf_field_name = self._search_regex(
2498 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2499 webpage, 'xsrf field name',
29f7c58a 2500 group='xsrf_field_name', default='session_token')
8a784c74 2501 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2502 self._proto_relative_url(invideo_url),
2503 video_id, note='Downloading annotations',
2504 errnote='Unable to download video annotations', fatal=False,
2505 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2506
277d6ff5 2507 if get_comments:
a1c5d2ca 2508 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2509
545cc85d 2510 self.mark_watched(video_id, player_response)
d77ab8e2 2511
545cc85d 2512 return info
c5e8d7af 2513
5f6a1245 2514
8bdd16b4 2515class YoutubeTabIE(YoutubeBaseInfoExtractor):
2516 IE_DESC = 'YouTube.com tab'
70d5c17b 2517 _VALID_URL = r'''(?x)
2518 https?://
2519 (?:\w+\.)?
2520 (?:
2521 youtube(?:kids)?\.com|
2522 invidio\.us
2523 )/
2524 (?:
fe03a6cd 2525 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 2526 (?P<not_channel>
9ba5705a 2527 feed/|hashtag/|
70d5c17b 2528 (?:playlist|watch)\?.*?\blist=
2529 )|
29f7c58a 2530 (?!(?:%s)\b) # Direct URLs
70d5c17b 2531 )
2532 (?P<id>[^/?\#&]+)
2533 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2534 IE_NAME = 'youtube:tab'
2535
81127aa5 2536 _TESTS = [{
da692b79 2537 'note': 'playlists, multipage',
8bdd16b4 2538 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2539 'playlist_mincount': 94,
2540 'info_dict': {
2541 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2542 'title': 'Игорь Клейнер - Playlists',
2543 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2544 'uploader': 'Игорь Клейнер',
2545 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2546 },
2547 }, {
da692b79 2548 'note': 'playlists, multipage, different order',
8bdd16b4 2549 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2550 'playlist_mincount': 94,
2551 'info_dict': {
2552 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2553 'title': 'Игорь Клейнер - Playlists',
2554 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2555 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2556 'uploader': 'Игорь Клейнер',
8bdd16b4 2557 },
201c1459 2558 }, {
da692b79 2559 'note': 'playlists, series',
201c1459 2560 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2561 'playlist_mincount': 5,
2562 'info_dict': {
2563 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2564 'title': '3Blue1Brown - Playlists',
2565 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 2566 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2567 'uploader': '3Blue1Brown',
201c1459 2568 },
8bdd16b4 2569 }, {
da692b79 2570 'note': 'playlists, singlepage',
8bdd16b4 2571 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2572 'playlist_mincount': 4,
2573 'info_dict': {
2574 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2575 'title': 'ThirstForScience - Playlists',
2576 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2577 'uploader': 'ThirstForScience',
2578 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2579 }
2580 }, {
2581 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2582 'only_matching': True,
2583 }, {
da692b79 2584 'note': 'basic, single video playlist',
0e30a7b9 2585 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2586 'info_dict': {
0e30a7b9 2587 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2588 'uploader': 'Sergey M.',
2589 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2590 'title': 'youtube-dl public playlist',
81127aa5 2591 },
0e30a7b9 2592 'playlist_count': 1,
9291475f 2593 }, {
da692b79 2594 'note': 'empty playlist',
0e30a7b9 2595 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2596 'info_dict': {
0e30a7b9 2597 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2598 'uploader': 'Sergey M.',
2599 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2600 'title': 'youtube-dl empty playlist',
9291475f
PH
2601 },
2602 'playlist_count': 0,
2603 }, {
da692b79 2604 'note': 'Home tab',
8bdd16b4 2605 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2606 'info_dict': {
8bdd16b4 2607 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2608 'title': 'lex will - Home',
2609 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2610 'uploader': 'lex will',
2611 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2612 },
8bdd16b4 2613 'playlist_mincount': 2,
9291475f 2614 }, {
da692b79 2615 'note': 'Videos tab',
8bdd16b4 2616 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2617 'info_dict': {
8bdd16b4 2618 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2619 'title': 'lex will - Videos',
2620 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2621 'uploader': 'lex will',
2622 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2623 },
8bdd16b4 2624 'playlist_mincount': 975,
9291475f 2625 }, {
da692b79 2626 'note': 'Videos tab, sorted by popular',
8bdd16b4 2627 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2628 'info_dict': {
8bdd16b4 2629 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2630 'title': 'lex will - Videos',
2631 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2632 'uploader': 'lex will',
2633 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2634 },
8bdd16b4 2635 'playlist_mincount': 199,
9291475f 2636 }, {
da692b79 2637 'note': 'Playlists tab',
8bdd16b4 2638 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2639 'info_dict': {
8bdd16b4 2640 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2641 'title': 'lex will - Playlists',
2642 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2643 'uploader': 'lex will',
2644 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2645 },
8bdd16b4 2646 'playlist_mincount': 17,
ac7553d0 2647 }, {
da692b79 2648 'note': 'Community tab',
8bdd16b4 2649 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2650 'info_dict': {
8bdd16b4 2651 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2652 'title': 'lex will - Community',
2653 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2654 'uploader': 'lex will',
2655 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2656 },
2657 'playlist_mincount': 18,
87dadd45 2658 }, {
da692b79 2659 'note': 'Channels tab',
8bdd16b4 2660 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2661 'info_dict': {
8bdd16b4 2662 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2663 'title': 'lex will - Channels',
2664 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2665 'uploader': 'lex will',
2666 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2667 },
deaec5af 2668 'playlist_mincount': 12,
cd684175 2669 }, {
2670 'note': 'Search tab',
2671 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
2672 'playlist_mincount': 40,
2673 'info_dict': {
2674 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2675 'title': '3Blue1Brown - Search - linear algebra',
2676 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2677 'uploader': '3Blue1Brown',
2678 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2679 },
6b08cdf6 2680 }, {
a0566bbf 2681 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2682 'only_matching': True,
2683 }, {
a0566bbf 2684 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2685 'only_matching': True,
2686 }, {
a0566bbf 2687 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2688 'only_matching': True,
2689 }, {
2690 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2691 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2692 'info_dict': {
2693 'title': '29C3: Not my department',
2694 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2695 'uploader': 'Christiaan008',
2696 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2697 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2698 },
2699 'playlist_count': 96,
2700 }, {
2701 'note': 'Large playlist',
2702 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2703 'info_dict': {
8bdd16b4 2704 'title': 'Uploads from Cauchemar',
2705 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2706 'uploader': 'Cauchemar',
2707 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2708 },
8bdd16b4 2709 'playlist_mincount': 1123,
2710 }, {
da692b79 2711 'note': 'even larger playlist, 8832 videos',
8bdd16b4 2712 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2713 'only_matching': True,
4b7df0d3
JMF
2714 }, {
2715 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2716 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2717 'info_dict': {
acf757f4
PH
2718 'title': 'Uploads from Interstellar Movie',
2719 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2720 'uploader': 'Interstellar Movie',
8bdd16b4 2721 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2722 },
481cc733 2723 'playlist_mincount': 21,
358de58c 2724 }, {
2725 'note': 'Playlist with "show unavailable videos" button',
2726 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2727 'info_dict': {
2728 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2729 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2730 'uploader': 'Phim Siêu Nhân Nhật Bản',
2731 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2732 },
da692b79 2733 'playlist_mincount': 200,
5d342002 2734 }, {
da692b79 2735 'note': 'Playlist with unavailable videos in page 7',
5d342002 2736 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2737 'info_dict': {
2738 'title': 'Uploads from BlankTV',
2739 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2740 'uploader': 'BlankTV',
2741 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2742 },
da692b79 2743 'playlist_mincount': 1000,
8bdd16b4 2744 }, {
da692b79 2745 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 2746 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2747 'info_dict': {
2748 'title': 'Data Analysis with Dr Mike Pound',
2749 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2750 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2751 'uploader': 'Computerphile',
deaec5af 2752 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2753 },
2754 'playlist_mincount': 11,
2755 }, {
a0566bbf 2756 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2757 'only_matching': True,
dacb3a86 2758 }, {
da692b79 2759 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
2760 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2761 'info_dict': {
2762 'id': 'FqZTN594JQw',
2763 'ext': 'webm',
2764 'title': "Smiley's People 01 detective, Adventure Series, Action",
2765 'uploader': 'STREEM',
2766 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2767 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2768 'upload_date': '20150526',
2769 'license': 'Standard YouTube License',
2770 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2771 'categories': ['People & Blogs'],
2772 'tags': list,
dbdaaa23 2773 'view_count': int,
dacb3a86
S
2774 'like_count': int,
2775 'dislike_count': int,
2776 },
2777 'params': {
2778 'skip_download': True,
2779 },
13a75688 2780 'skip': 'This video is not available.',
dacb3a86 2781 'add_ie': [YoutubeIE.ie_key()],
481cc733 2782 }, {
8bdd16b4 2783 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2784 'only_matching': True,
66b48727 2785 }, {
8bdd16b4 2786 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2787 'only_matching': True,
a0566bbf 2788 }, {
2789 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2790 'info_dict': {
da692b79 2791 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 2792 'ext': 'mp4',
deaec5af 2793 'title': compat_str,
a0566bbf 2794 'uploader': 'Sky News',
2795 'uploader_id': 'skynews',
2796 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 2797 'upload_date': r're:\d{8}',
2798 'description': compat_str,
a0566bbf 2799 'categories': ['News & Politics'],
2800 'tags': list,
2801 'like_count': int,
2802 'dislike_count': int,
2803 },
2804 'params': {
2805 'skip_download': True,
2806 },
da692b79 2807 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 2808 }, {
2809 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2810 'info_dict': {
2811 'id': 'a48o2S1cPoo',
2812 'ext': 'mp4',
2813 'title': 'The Young Turks - Live Main Show',
2814 'uploader': 'The Young Turks',
2815 'uploader_id': 'TheYoungTurks',
2816 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2817 'upload_date': '20150715',
2818 'license': 'Standard YouTube License',
2819 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2820 'categories': ['News & Politics'],
2821 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2822 'like_count': int,
2823 'dislike_count': int,
2824 },
2825 'params': {
2826 'skip_download': True,
2827 },
2828 'only_matching': True,
2829 }, {
2830 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2831 'only_matching': True,
2832 }, {
2833 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2834 'only_matching': True,
3d3dddc9 2835 }, {
2836 'url': 'https://www.youtube.com/feed/trending',
2837 'only_matching': True,
2838 }, {
3d3dddc9 2839 'url': 'https://www.youtube.com/feed/library',
2840 'only_matching': True,
2841 }, {
3d3dddc9 2842 'url': 'https://www.youtube.com/feed/history',
2843 'only_matching': True,
2844 }, {
3d3dddc9 2845 'url': 'https://www.youtube.com/feed/subscriptions',
2846 'only_matching': True,
2847 }, {
3d3dddc9 2848 'url': 'https://www.youtube.com/feed/watch_later',
2849 'only_matching': True,
2850 }, {
da692b79 2851 'note': 'Recommended - redirects to home page',
3d3dddc9 2852 'url': 'https://www.youtube.com/feed/recommended',
2853 'only_matching': True,
29f7c58a 2854 }, {
da692b79 2855 'note': 'inline playlist with not always working continuations',
29f7c58a 2856 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2857 'only_matching': True,
2858 }, {
2859 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2860 'only_matching': True,
2861 }, {
2862 'url': 'https://www.youtube.com/course',
2863 'only_matching': True,
2864 }, {
2865 'url': 'https://www.youtube.com/zsecurity',
2866 'only_matching': True,
2867 }, {
2868 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2869 'only_matching': True,
2870 }, {
2871 'url': 'https://www.youtube.com/TheYoungTurks/live',
2872 'only_matching': True,
39ed931e 2873 }, {
2874 'url': 'https://www.youtube.com/hashtag/cctv9',
2875 'info_dict': {
2876 'id': 'cctv9',
2877 'title': '#cctv9',
2878 },
2879 'playlist_mincount': 350,
201c1459 2880 }, {
2881 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2882 'only_matching': True,
9297939e 2883 }, {
da692b79 2884 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 2885 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2886 'only_matching': True
fe03a6cd 2887 }, {
2888 'note': '/browse/ should redirect to /channel/',
2889 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
2890 'only_matching': True
2891 }, {
2892 'note': 'VLPL, should redirect to playlist?list=PL...',
2893 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2894 'info_dict': {
2895 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2896 'uploader': 'NoCopyrightSounds',
2897 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
2898 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
2899 'title': 'NCS Releases',
2900 },
2901 'playlist_mincount': 166,
18db7548 2902 }, {
2903 'note': 'Topic, should redirect to playlist?list=UU...',
2904 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
2905 'info_dict': {
2906 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
2907 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
2908 'title': 'Uploads from Royalty Free Music - Topic',
2909 'uploader': 'Royalty Free Music - Topic',
2910 },
2911 'expected_warnings': [
2912 'A channel/user page was given',
2913 'The URL does not have a videos tab',
2914 ],
2915 'playlist_mincount': 101,
2916 }, {
2917 'note': 'Topic without a UU playlist',
2918 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
2919 'info_dict': {
2920 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
2921 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
2922 },
2923 'expected_warnings': [
2924 'A channel/user page was given',
2925 'The URL does not have a videos tab',
2926 'Falling back to channel URL',
2927 ],
2928 'playlist_mincount': 9,
abcdd12b 2929 }, {
2930 'note': 'Youtube music Album',
2931 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
2932 'info_dict': {
2933 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
2934 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
2935 },
2936 'playlist_count': 50,
29f7c58a 2937 }]
2938
2939 @classmethod
2940 def suitable(cls, url):
2941 return False if YoutubeIE.suitable(url) else super(
2942 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2943
2944 def _extract_channel_id(self, webpage):
2945 channel_id = self._html_search_meta(
2946 'channelId', webpage, 'channel id', default=None)
2947 if channel_id:
2948 return channel_id
2949 channel_url = self._html_search_meta(
2950 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2951 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2952 'twitter:app:url:googleplay'), webpage, 'channel url')
2953 return self._search_regex(
2954 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2955 channel_url, 'channel id')
15f6397c 2956
8bdd16b4 2957 @staticmethod
cd7c66cf 2958 def _extract_basic_item_renderer(item):
2959 # Modified from _extract_grid_item_renderer
201c1459 2960 known_basic_renderers = (
2961 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 2962 )
2963 for key, renderer in item.items():
201c1459 2964 if not isinstance(renderer, dict):
cd7c66cf 2965 continue
201c1459 2966 elif key in known_basic_renderers:
2967 return renderer
2968 elif key.startswith('grid') and key.endswith('Renderer'):
2969 return renderer
8bdd16b4 2970
8bdd16b4 2971 def _grid_entries(self, grid_renderer):
2972 for item in grid_renderer['items']:
2973 if not isinstance(item, dict):
39b62db1 2974 continue
cd7c66cf 2975 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2976 if not isinstance(renderer, dict):
2977 continue
2978 title = try_get(
201c1459 2979 renderer, (lambda x: x['title']['runs'][0]['text'],
2980 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 2981 # playlist
2982 playlist_id = renderer.get('playlistId')
2983 if playlist_id:
2984 yield self.url_result(
2985 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2986 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2987 video_title=title)
201c1459 2988 continue
8bdd16b4 2989 # video
2990 video_id = renderer.get('videoId')
2991 if video_id:
2992 yield self._extract_video(renderer)
201c1459 2993 continue
8bdd16b4 2994 # channel
2995 channel_id = renderer.get('channelId')
2996 if channel_id:
2997 title = try_get(
2998 renderer, lambda x: x['title']['simpleText'], compat_str)
2999 yield self.url_result(
3000 'https://www.youtube.com/channel/%s' % channel_id,
3001 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3002 continue
3003 # generic endpoint URL support
3004 ep_url = urljoin('https://www.youtube.com/', try_get(
3005 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3006 compat_str))
3007 if ep_url:
3008 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3009 if ie.suitable(ep_url):
3010 yield self.url_result(
3011 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3012 break
8bdd16b4 3013
3d3dddc9 3014 def _shelf_entries_from_content(self, shelf_renderer):
3015 content = shelf_renderer.get('content')
3016 if not isinstance(content, dict):
8bdd16b4 3017 return
cd7c66cf 3018 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3019 if renderer:
3020 # TODO: add support for nested playlists so each shelf is processed
3021 # as separate playlist
3022 # TODO: this includes only first N items
3023 for entry in self._grid_entries(renderer):
3024 yield entry
3025 renderer = content.get('horizontalListRenderer')
3026 if renderer:
3027 # TODO
3028 pass
8bdd16b4 3029
29f7c58a 3030 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3031 ep = try_get(
3032 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3033 compat_str)
3034 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3035 if shelf_url:
29f7c58a 3036 # Skipping links to another channels, note that checking for
3037 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3038 # will not work
3039 if skip_channels and '/channels?' in shelf_url:
3040 return
3d3dddc9 3041 title = try_get(
3042 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3043 yield self.url_result(shelf_url, video_title=title)
3044 # Shelf may not contain shelf URL, fallback to extraction from content
3045 for entry in self._shelf_entries_from_content(shelf_renderer):
3046 yield entry
c5e8d7af 3047
8bdd16b4 3048 def _playlist_entries(self, video_list_renderer):
3049 for content in video_list_renderer['contents']:
3050 if not isinstance(content, dict):
3051 continue
3052 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3053 if not isinstance(renderer, dict):
3054 continue
3055 video_id = renderer.get('videoId')
3056 if not video_id:
3057 continue
3058 yield self._extract_video(renderer)
07aeced6 3059
3462ffa8 3060 def _rich_entries(self, rich_grid_renderer):
3061 renderer = try_get(
70d5c17b 3062 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3063 video_id = renderer.get('videoId')
3064 if not video_id:
3065 return
3066 yield self._extract_video(renderer)
3067
8bdd16b4 3068 def _video_entry(self, video_renderer):
3069 video_id = video_renderer.get('videoId')
3070 if video_id:
3071 return self._extract_video(video_renderer)
dacb3a86 3072
8bdd16b4 3073 def _post_thread_entries(self, post_thread_renderer):
3074 post_renderer = try_get(
3075 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3076 if not post_renderer:
3077 return
3078 # video attachment
3079 video_renderer = try_get(
895b0931 3080 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3081 video_id = video_renderer.get('videoId')
3082 if video_id:
3083 entry = self._extract_video(video_renderer)
8bdd16b4 3084 if entry:
3085 yield entry
895b0931 3086 # playlist attachment
3087 playlist_id = try_get(
3088 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3089 if playlist_id:
3090 yield self.url_result(
e28f1c0a 3091 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3092 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3093 # inline video links
3094 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3095 for run in runs:
3096 if not isinstance(run, dict):
3097 continue
3098 ep_url = try_get(
3099 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3100 if not ep_url:
3101 continue
3102 if not YoutubeIE.suitable(ep_url):
3103 continue
3104 ep_video_id = YoutubeIE._match_id(ep_url)
3105 if video_id == ep_video_id:
3106 continue
895b0931 3107 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3108
8bdd16b4 3109 def _post_thread_continuation_entries(self, post_thread_continuation):
3110 contents = post_thread_continuation.get('contents')
3111 if not isinstance(contents, list):
3112 return
3113 for content in contents:
3114 renderer = content.get('backstagePostThreadRenderer')
3115 if not isinstance(renderer, dict):
3116 continue
3117 for entry in self._post_thread_entries(renderer):
3118 yield entry
07aeced6 3119
39ed931e 3120 r''' # unused
3121 def _rich_grid_entries(self, contents):
3122 for content in contents:
3123 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3124 if video_renderer:
3125 entry = self._video_entry(video_renderer)
3126 if entry:
3127 yield entry
3128 '''
3129
29f7c58a 3130 @staticmethod
3131 def _build_continuation_query(continuation, ctp=None):
3132 query = {
3133 'ctoken': continuation,
3134 'continuation': continuation,
3135 }
3136 if ctp:
3137 query['itct'] = ctp
3138 return query
3139
8bdd16b4 3140 @staticmethod
3141 def _extract_next_continuation_data(renderer):
3142 next_continuation = try_get(
3143 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3144 if not next_continuation:
3145 return
3146 continuation = next_continuation.get('continuation')
3147 if not continuation:
3148 return
3149 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3150 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3151
8bdd16b4 3152 @classmethod
3153 def _extract_continuation(cls, renderer):
3154 next_continuation = cls._extract_next_continuation_data(renderer)
3155 if next_continuation:
3156 return next_continuation
cc2db878 3157 contents = []
3158 for key in ('contents', 'items'):
3159 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3160 for content in contents:
3161 if not isinstance(content, dict):
3162 continue
3163 continuation_ep = try_get(
3164 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3165 dict)
3166 if not continuation_ep:
3167 continue
3168 continuation = try_get(
3169 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3170 if not continuation:
3171 continue
3172 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3173 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3174
f4f751af 3175 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3176
70d5c17b 3177 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3178 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3179 for content in contents:
3180 if not isinstance(content, dict):
8bdd16b4 3181 continue
70d5c17b 3182 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3183 if not is_renderer:
70d5c17b 3184 renderer = content.get('richItemRenderer')
3462ffa8 3185 if renderer:
3186 for entry in self._rich_entries(renderer):
3187 yield entry
3188 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3189 continue
3462ffa8 3190 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3191 for isr_content in isr_contents:
3192 if not isinstance(isr_content, dict):
3193 continue
69184e41 3194
3195 known_renderers = {
3196 'playlistVideoListRenderer': self._playlist_entries,
3197 'gridRenderer': self._grid_entries,
3198 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3199 'backstagePostThreadRenderer': self._post_thread_entries,
3200 'videoRenderer': lambda x: [self._video_entry(x)],
3201 }
3202 for key, renderer in isr_content.items():
3203 if key not in known_renderers:
3204 continue
3205 for entry in known_renderers[key](renderer):
3206 if entry:
3207 yield entry
3462ffa8 3208 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3209 break
70d5c17b 3210
3462ffa8 3211 if not continuation_list[0]:
3212 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3213
3214 if not continuation_list[0]:
3215 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3216
3217 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3218 tab_content = try_get(tab, lambda x: x['content'], dict)
3219 if not tab_content:
3220 return
3462ffa8 3221 parent_renderer = (
29f7c58a 3222 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3223 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3224 for entry in extract_entries(parent_renderer):
3225 yield entry
3462ffa8 3226 continuation = continuation_list[0]
f4f751af 3227 context = self._extract_context(ytcfg)
3228 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3229
8bdd16b4 3230 for page_num in itertools.count(1):
3231 if not continuation:
3232 break
79360d99 3233 query = {
3234 'continuation': continuation['continuation'],
3235 'clickTracking': {'clickTrackingParams': continuation['itct']}
3236 }
f4f751af 3237 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3238 response = self._extract_response(
3239 item_id='%s page %s' % (item_id, page_num),
3240 query=query, headers=headers, ytcfg=ytcfg,
3241 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3242
3243 if not response:
8bdd16b4 3244 break
f4f751af 3245 visitor_data = try_get(
3246 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3247
69184e41 3248 known_continuation_renderers = {
3249 'playlistVideoListContinuation': self._playlist_entries,
3250 'gridContinuation': self._grid_entries,
3251 'itemSectionContinuation': self._post_thread_continuation_entries,
3252 'sectionListContinuation': extract_entries, # for feeds
3253 }
8bdd16b4 3254 continuation_contents = try_get(
69184e41 3255 response, lambda x: x['continuationContents'], dict) or {}
3256 continuation_renderer = None
3257 for key, value in continuation_contents.items():
3258 if key not in known_continuation_renderers:
3462ffa8 3259 continue
69184e41 3260 continuation_renderer = value
3261 continuation_list = [None]
3262 for entry in known_continuation_renderers[key](continuation_renderer):
3263 yield entry
3264 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3265 break
3266 if continuation_renderer:
3267 continue
c5e8d7af 3268
a1b535bd 3269 known_renderers = {
3270 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3271 'gridVideoRenderer': (self._grid_entries, 'items'),
3272 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3273 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3274 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3275 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3276 }
cce889b9 3277 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3278 continuation_items = try_get(
cce889b9 3279 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3280 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3281 video_items_renderer = None
3282 for key, value in continuation_item.items():
3283 if key not in known_renderers:
8bdd16b4 3284 continue
a1b535bd 3285 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3286 continuation_list = [None]
a1b535bd 3287 for entry in known_renderers[key][0](video_items_renderer):
3288 yield entry
9ba5705a 3289 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3290 break
3291 if video_items_renderer:
3292 continue
8bdd16b4 3293 break
9558dcec 3294
8bdd16b4 3295 @staticmethod
3296 def _extract_selected_tab(tabs):
3297 for tab in tabs:
cd684175 3298 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3299 if renderer.get('selected') is True:
3300 return renderer
2b3c2546 3301 else:
8bdd16b4 3302 raise ExtractorError('Unable to find selected tab')
b82f815f 3303
8bdd16b4 3304 @staticmethod
3305 def _extract_uploader(data):
3306 uploader = {}
3307 sidebar_renderer = try_get(
3308 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3309 if sidebar_renderer:
3310 for item in sidebar_renderer:
3311 if not isinstance(item, dict):
3312 continue
3313 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3314 if not isinstance(renderer, dict):
3315 continue
3316 owner = try_get(
3317 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3318 if owner:
3319 uploader['uploader'] = owner.get('text')
3320 uploader['uploader_id'] = try_get(
3321 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3322 uploader['uploader_url'] = urljoin(
3323 'https://www.youtube.com/',
3324 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3325 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3326
d069eca7 3327 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3328 playlist_id = title = description = channel_url = channel_name = channel_id = None
3329 thumbnails_list = tags = []
3330
8bdd16b4 3331 selected_tab = self._extract_selected_tab(tabs)
3332 renderer = try_get(
3333 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3334 if renderer:
b60419c5 3335 channel_name = renderer.get('title')
3336 channel_url = renderer.get('channelUrl')
3337 channel_id = renderer.get('externalId')
39ed931e 3338 else:
64c0d954 3339 renderer = try_get(
3340 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3341
8bdd16b4 3342 if renderer:
3343 title = renderer.get('title')
ecc97af3 3344 description = renderer.get('description', '')
b60419c5 3345 playlist_id = channel_id
3346 tags = renderer.get('keywords', '').split()
3347 thumbnails_list = (
3348 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3349 or try_get(
3350 data,
3351 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3352 list)
b60419c5 3353 or [])
3354
3355 thumbnails = []
3356 for t in thumbnails_list:
3357 if not isinstance(t, dict):
3358 continue
3359 thumbnail_url = url_or_none(t.get('url'))
3360 if not thumbnail_url:
3361 continue
3362 thumbnails.append({
3363 'url': thumbnail_url,
3364 'width': int_or_none(t.get('width')),
3365 'height': int_or_none(t.get('height')),
3366 })
3462ffa8 3367 if playlist_id is None:
70d5c17b 3368 playlist_id = item_id
3369 if title is None:
39ed931e 3370 title = (
3371 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3372 or playlist_id)
b60419c5 3373 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3374 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3375
3376 metadata = {
3377 'playlist_id': playlist_id,
3378 'playlist_title': title,
3379 'playlist_description': description,
3380 'uploader': channel_name,
3381 'uploader_id': channel_id,
3382 'uploader_url': channel_url,
3383 'thumbnails': thumbnails,
3384 'tags': tags,
3385 }
3386 if not channel_id:
3387 metadata.update(self._extract_uploader(data))
3388 metadata.update({
3389 'channel': metadata['uploader'],
3390 'channel_id': metadata['uploader_id'],
3391 'channel_url': metadata['uploader_url']})
3392 return self.playlist_result(
d069eca7
M
3393 self._entries(
3394 selected_tab, playlist_id,
3395 self._extract_identity_token(webpage, item_id),
f4f751af 3396 self._extract_account_syncid(data),
3397 self._extract_ytcfg(item_id, webpage)),
b60419c5 3398 **metadata)
73c4ac2c 3399
79360d99 3400 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3401 first_id = last_id = None
79360d99 3402 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3403 headers = self._generate_api_headers(
3404 ytcfg, account_syncid=self._extract_account_syncid(data),
3405 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3406 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3407 for page_num in itertools.count(1):
cd7c66cf 3408 videos = list(self._playlist_entries(playlist))
3409 if not videos:
3410 return
2be71994 3411 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3412 if start >= len(videos):
3413 return
3414 for video in videos[start:]:
3415 if video['id'] == first_id:
3416 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3417 return
3418 yield video
3419 first_id = first_id or videos[0]['id']
3420 last_id = videos[-1]['id']
79360d99 3421 watch_endpoint = try_get(
3422 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3423 query = {
3424 'playlistId': playlist_id,
3425 'videoId': watch_endpoint.get('videoId') or last_id,
3426 'index': watch_endpoint.get('index') or len(videos),
3427 'params': watch_endpoint.get('params') or 'OAE%3D'
3428 }
3429 response = self._extract_response(
3430 item_id='%s page %d' % (playlist_id, page_num),
3431 query=query,
3432 ep='next',
3433 headers=headers,
3434 check_get_keys='contents'
3435 )
cd7c66cf 3436 playlist = try_get(
79360d99 3437 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3438
79360d99 3439 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3440 title = playlist.get('title') or try_get(
3441 data, lambda x: x['titleText']['simpleText'], compat_str)
3442 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3443
3444 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3445 playlist_url = urljoin(url, try_get(
3446 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3447 compat_str))
3448 if playlist_url and playlist_url != url:
3449 return self.url_result(
3450 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3451 video_title=title)
cd7c66cf 3452
8bdd16b4 3453 return self.playlist_result(
79360d99 3454 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3455 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3456
95c01b6c 3457 @staticmethod
3458 def _extract_alerts(data):
3459 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3460 if not isinstance(alert_dict, dict):
3461 continue
3462 for alert in alert_dict.values():
3463 alert_type = alert.get('type')
3464 if not alert_type:
02ced43c 3465 continue
95c01b6c 3466 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
3467 if message:
3468 yield alert_type, message
3469 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3470 message += try_get(run, lambda x: x['text'], compat_str)
3471 if message:
3472 yield alert_type, message
3473
3474 def _report_alerts(self, alerts, expected=True):
3ffc7c89 3475 errors = []
3476 warnings = []
95c01b6c 3477 for alert_type, alert_message in alerts:
f3eaa8dd 3478 if alert_type.lower() == 'error':
3ffc7c89 3479 errors.append([alert_type, alert_message])
f3eaa8dd 3480 else:
3ffc7c89 3481 warnings.append([alert_type, alert_message])
f3eaa8dd 3482
3ffc7c89 3483 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3484 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3485 if errors:
3486 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3487
95c01b6c 3488 def _extract_and_report_alerts(self, data, *args, **kwargs):
3489 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
3490
358de58c 3491 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3492 """
3493 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3494 """
3495 sidebar_renderer = try_get(
5d342002 3496 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3497 if not sidebar_renderer:
3498 return
3499 browse_id = params = None
358de58c 3500 for item in sidebar_renderer:
3501 if not isinstance(item, dict):
3502 continue
3503 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3504 menu_renderer = try_get(
3505 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3506 for menu_item in menu_renderer:
3507 if not isinstance(menu_item, dict):
3508 continue
3509 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3510 text = try_get(
3511 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3512 if not text or text.lower() != 'show unavailable videos':
3513 continue
3514 browse_endpoint = try_get(
3515 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3516 browse_id = browse_endpoint.get('browseId')
3517 params = browse_endpoint.get('params')
5d342002 3518 break
3519
3520 ytcfg = self._extract_ytcfg(item_id, webpage)
3521 headers = self._generate_api_headers(
3522 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3523 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3524 visitor_data=try_get(
3525 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3526 query = {
3527 'params': params or 'wgYCCAA=',
3528 'browseId': browse_id or 'VL%s' % item_id
3529 }
3530 return self._extract_response(
3531 item_id=item_id, headers=headers, query=query,
3532 check_get_keys='contents', fatal=False,
3533 note='Downloading API JSON with unavailable videos')
358de58c 3534
79360d99 3535 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3536 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3537 response = None
3538 last_error = None
3539 count = -1
a06916d9 3540 retries = self.get_param('extractor_retries', 3)
79360d99 3541 if check_get_keys is None:
3542 check_get_keys = []
3543 while count < retries:
3544 count += 1
3545 if last_error:
3546 self.report_warning('%s. Retrying ...' % last_error)
3547 try:
3548 response = self._call_api(
3549 ep=ep, fatal=True, headers=headers,
358de58c 3550 video_id=item_id, query=query,
79360d99 3551 context=self._extract_context(ytcfg),
3552 api_key=self._extract_api_key(ytcfg),
3553 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3554 except ExtractorError as e:
3555 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3556 # Downloading page may result in intermittent 5xx HTTP error
3557 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3558 last_error = 'HTTP Error %s' % e.cause.code
3559 if count < retries:
3560 continue
358de58c 3561 if fatal:
3562 raise
3563 else:
3564 self.report_warning(error_to_compat_str(e))
3565 return
3566
79360d99 3567 else:
3568 # Youtube may send alerts if there was an issue with the continuation page
95c01b6c 3569 self._extract_and_report_alerts(response, expected=False)
79360d99 3570 if not check_get_keys or dict_get(response, check_get_keys):
3571 break
3572 # Youtube sometimes sends incomplete data
3573 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3574 last_error = 'Incomplete data received'
3575 if count >= retries:
358de58c 3576 if fatal:
3577 raise ExtractorError(last_error)
3578 else:
3579 self.report_warning(last_error)
3580 return
79360d99 3581 return response
3582
cd7c66cf 3583 def _extract_webpage(self, url, item_id):
a06916d9 3584 retries = self.get_param('extractor_retries', 3)
62bff2c1 3585 count = -1
c705177d 3586 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3587 while count < retries:
62bff2c1 3588 count += 1
14fdfea9 3589 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3590 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3591 if count:
c705177d 3592 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3593 webpage = self._download_webpage(
3594 url, item_id,
cd7c66cf 3595 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3596 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3597 if data.get('contents') or data.get('currentVideoEndpoint'):
3598 break
95c01b6c 3599 # Extract alerts here only when there is error
3600 self._extract_and_report_alerts(data)
c705177d 3601 if count >= retries:
6a39ee13 3602 raise ExtractorError(last_error)
cd7c66cf 3603 return webpage, data
3604
9297939e 3605 @staticmethod
3606 def _smuggle_data(entries, data):
3607 for entry in entries:
3608 if data:
3609 entry['url'] = smuggle_url(entry['url'], data)
3610 yield entry
3611
cd7c66cf 3612 def _real_extract(self, url):
9297939e 3613 url, smuggled_data = unsmuggle_url(url, {})
3614 if self.is_music_url(url):
3615 smuggled_data['is_music_url'] = True
fe03a6cd 3616 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3617 if info_dict.get('entries'):
3618 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3619 return info_dict
3620
fe03a6cd 3621 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3622
3623 def __real_extract(self, url, smuggled_data):
cd7c66cf 3624 item_id = self._match_id(url)
3625 url = compat_urlparse.urlunparse(
3626 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3627 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3628
fe03a6cd 3629 def get_mobj(url):
3630 mobj = self._url_re.match(url).groupdict()
3631 mobj.update((k, '') for k,v in mobj.items() if v is None)
3632 return mobj
3633
3634 mobj = get_mobj(url)
3635 # Youtube returns incomplete data if tabname is not lower case
3636 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
3637
3638 if is_channel:
3639 if smuggled_data.get('is_music_url'):
3640 if item_id[:2] == 'VL':
3641 # Youtube music VL channels have an equivalent playlist
3642 item_id = item_id[2:]
3643 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 3644 elif item_id[:2] == 'MP':
3645 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
3646 item_id = self._search_regex(
3647 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
3648 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
3649 'playlist id')
3650 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 3651 elif mobj['channel_type'] == 'browse':
3652 # Youtube music /browse/ should be changed to /channel/
3653 pre = 'https://www.youtube.com/channel/%s' % item_id
3654 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
3655 # Home URLs should redirect to /videos/
6a39ee13 3656 self.report_warning(
cd7c66cf 3657 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3658 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 3659 tab = '/videos'
3660
3661 url = ''.join((pre, tab, post))
3662 mobj = get_mobj(url)
cd7c66cf 3663
3664 # Handle both video/playlist URLs
201c1459 3665 qs = parse_qs(url)
cd7c66cf 3666 video_id = qs.get('v', [None])[0]
3667 playlist_id = qs.get('list', [None])[0]
3668
fe03a6cd 3669 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 3670 if not playlist_id:
fe03a6cd 3671 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 3672 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 3673 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 3674 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3675 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 3676 mobj = get_mobj(url)
cd7c66cf 3677
3678 if video_id and playlist_id:
a06916d9 3679 if self.get_param('noplaylist'):
cd7c66cf 3680 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3681 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3682 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3683
3684 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3685
18db7548 3686 tabs = try_get(
3687 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3688 if tabs:
3689 selected_tab = self._extract_selected_tab(tabs)
3690 tab_name = selected_tab.get('title', '')
3691 if (mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]
3692 and 'no-youtube-channel-redirect' not in compat_opts):
3693 if not mobj['not_channel'] and item_id[:2] == 'UC':
3694 # Topic channels don't have /videos. Use the equivalent playlist instead
3695 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
3696 pl_id = 'UU%s' % item_id[2:]
3697 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
3698 try:
3699 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
3700 for alert_type, alert_message in self._extract_alerts(pl_data):
3701 if alert_type == 'error':
3702 raise ExtractorError('Youtube said: %s' % alert_message)
3703 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
3704 except ExtractorError:
3705 self.report_warning('The playlist gave error. Falling back to channel URL')
3706 else:
3707 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
3708
3709 self.write_debug('Final URL: %s' % url)
3710
358de58c 3711 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3712 if 'no-youtube-unavailable-videos' not in compat_opts:
3713 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 3714 self._extract_and_report_alerts(data)
358de58c 3715
8bdd16b4 3716 tabs = try_get(
3717 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3718 if tabs:
d069eca7 3719 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3720
8bdd16b4 3721 playlist = try_get(
3722 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3723 if playlist:
79360d99 3724 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3725
a0566bbf 3726 video_id = try_get(
3727 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3728 compat_str) or video_id
8bdd16b4 3729 if video_id:
6a39ee13 3730 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3731 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3732
8bdd16b4 3733 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3734
c5e8d7af 3735
8bdd16b4 3736class YoutubePlaylistIE(InfoExtractor):
3737 IE_DESC = 'YouTube.com playlists'
3738 _VALID_URL = r'''(?x)(?:
3739 (?:https?://)?
3740 (?:\w+\.)?
3741 (?:
3742 (?:
3743 youtube(?:kids)?\.com|
29f7c58a 3744 invidio\.us
8bdd16b4 3745 )
3746 /.*?\?.*?\blist=
3747 )?
3748 (?P<id>%(playlist_id)s)
3749 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3750 IE_NAME = 'youtube:playlist'
cdc628a4 3751 _TESTS = [{
8bdd16b4 3752 'note': 'issue #673',
3753 'url': 'PLBB231211A4F62143',
cdc628a4 3754 'info_dict': {
8bdd16b4 3755 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3756 'id': 'PLBB231211A4F62143',
3757 'uploader': 'Wickydoo',
3758 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3759 },
3760 'playlist_mincount': 29,
3761 }, {
3762 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3763 'info_dict': {
3764 'title': 'YDL_safe_search',
3765 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3766 },
3767 'playlist_count': 2,
3768 'skip': 'This playlist is private',
9558dcec 3769 }, {
8bdd16b4 3770 'note': 'embedded',
3771 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3772 'playlist_count': 4,
9558dcec 3773 'info_dict': {
8bdd16b4 3774 'title': 'JODA15',
3775 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3776 'uploader': 'milan',
3777 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3778 }
cdc628a4 3779 }, {
8bdd16b4 3780 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3781 'playlist_mincount': 982,
3782 'info_dict': {
3783 'title': '2018 Chinese New Singles (11/6 updated)',
3784 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3785 'uploader': 'LBK',
3786 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3787 }
daa0df9e 3788 }, {
29f7c58a 3789 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3790 'only_matching': True,
3791 }, {
3792 # music album playlist
3793 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3794 'only_matching': True,
3795 }]
3796
3797 @classmethod
3798 def suitable(cls, url):
201c1459 3799 if YoutubeTabIE.suitable(url):
3800 return False
1bdae7d3 3801 # Hack for lazy extractors until more generic solution is implemented
3802 # (see #28780)
3803 from .youtube import parse_qs
201c1459 3804 qs = parse_qs(url)
3805 if qs.get('v', [None])[0]:
3806 return False
3807 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3808
3809 def _real_extract(self, url):
3810 playlist_id = self._match_id(url)
9297939e 3811 is_music_url = self.is_music_url(url)
3812 url = update_url_query(
3813 'https://www.youtube.com/playlist',
3814 parse_qs(url) or {'list': playlist_id})
3815 if is_music_url:
3816 url = smuggle_url(url, {'is_music_url': True})
3817 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 3818
3819
3820class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3821 IE_DESC = 'youtu.be'
29f7c58a 3822 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3823 _TESTS = [{
8bdd16b4 3824 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3825 'info_dict': {
3826 'id': 'yeWKywCrFtk',
3827 'ext': 'mp4',
3828 'title': 'Small Scale Baler and Braiding Rugs',
3829 'uploader': 'Backus-Page House Museum',
3830 'uploader_id': 'backuspagemuseum',
3831 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3832 'upload_date': '20161008',
3833 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3834 'categories': ['Nonprofits & Activism'],
3835 'tags': list,
3836 'like_count': int,
3837 'dislike_count': int,
3838 },
3839 'params': {
3840 'noplaylist': True,
3841 'skip_download': True,
3842 },
39e7107d 3843 }, {
8bdd16b4 3844 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3845 'only_matching': True,
cdc628a4
PH
3846 }]
3847
8bdd16b4 3848 def _real_extract(self, url):
29f7c58a 3849 mobj = re.match(self._VALID_URL, url)
3850 video_id = mobj.group('id')
3851 playlist_id = mobj.group('playlist_id')
8bdd16b4 3852 return self.url_result(
29f7c58a 3853 update_url_query('https://www.youtube.com/watch', {
3854 'v': video_id,
3855 'list': playlist_id,
3856 'feature': 'youtu.be',
3857 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3858
3859
3860class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3861 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3862 _VALID_URL = r'ytuser:(?P<id>.+)'
3863 _TESTS = [{
3864 'url': 'ytuser:phihag',
3865 'only_matching': True,
3866 }]
3867
3868 def _real_extract(self, url):
3869 user_id = self._match_id(url)
3870 return self.url_result(
3871 'https://www.youtube.com/user/%s' % user_id,
3872 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3873
b05654f0 3874
3d3dddc9 3875class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3876 IE_NAME = 'youtube:favorites'
3877 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3878 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3879 _LOGIN_REQUIRED = True
3880 _TESTS = [{
3881 'url': ':ytfav',
3882 'only_matching': True,
3883 }, {
3884 'url': ':ytfavorites',
3885 'only_matching': True,
3886 }]
3887
3888 def _real_extract(self, url):
3889 return self.url_result(
3890 'https://www.youtube.com/playlist?list=LL',
3891 ie=YoutubeTabIE.ie_key())
3892
3893
79360d99 3894class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3895 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3896 # there doesn't appear to be a real limit, for example if you search for
3897 # 'python' you get more than 8.000.000 results
3898 _MAX_RESULTS = float('inf')
78caa52a 3899 IE_NAME = 'youtube:search'
b05654f0 3900 _SEARCH_KEY = 'ytsearch'
6c894ea1 3901 _SEARCH_PARAMS = None
9dd8e46a 3902 _TESTS = []
b05654f0 3903
6c894ea1 3904 def _entries(self, query, n):
a5c56234 3905 data = {'query': query}
6c894ea1
U
3906 if self._SEARCH_PARAMS:
3907 data['params'] = self._SEARCH_PARAMS
3908 total = 0
3909 for page_num in itertools.count(1):
79360d99 3910 search = self._extract_response(
3911 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3912 check_get_keys=('contents', 'onResponseReceivedCommands')
3913 )
6c894ea1 3914 if not search:
b4c08069 3915 break
6c894ea1
U
3916 slr_contents = try_get(
3917 search,
3918 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3919 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3920 list)
3921 if not slr_contents:
a22b2fd1 3922 break
0366ae87 3923
0366ae87
M
3924 # Youtube sometimes adds promoted content to searches,
3925 # changing the index location of videos and token.
3926 # So we search through all entries till we find them.
30a074c2 3927 continuation_token = None
3928 for slr_content in slr_contents:
a96c6d15 3929 if continuation_token is None:
3930 continuation_token = try_get(
3931 slr_content,
3932 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3933 compat_str)
3934
30a074c2 3935 isr_contents = try_get(
3936 slr_content,
3937 lambda x: x['itemSectionRenderer']['contents'],
3938 list)
9da76d30 3939 if not isr_contents:
30a074c2 3940 continue
3941 for content in isr_contents:
3942 if not isinstance(content, dict):
3943 continue
3944 video = content.get('videoRenderer')
3945 if not isinstance(video, dict):
3946 continue
3947 video_id = video.get('videoId')
3948 if not video_id:
3949 continue
3950
3951 yield self._extract_video(video)
3952 total += 1
3953 if total == n:
3954 return
0366ae87 3955
0366ae87 3956 if not continuation_token:
6c894ea1 3957 break
0366ae87 3958 data['continuation'] = continuation_token
b05654f0 3959
6c894ea1
U
3960 def _get_n_results(self, query, n):
3961 """Get a specified number of results for a query"""
3962 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3963
c9ae7b95 3964
a3dd9248 3965class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3966 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3967 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3968 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3969 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3970
c9ae7b95 3971
386e1dd9 3972class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3973 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3974 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3975 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3976 # _MAX_RESULTS = 100
3462ffa8 3977 _TESTS = [{
3978 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3979 'playlist_mincount': 5,
3980 'info_dict': {
3981 'title': 'youtube-dl test video',
3982 }
3983 }, {
3984 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3985 'only_matching': True,
3986 }]
3987
386e1dd9 3988 @classmethod
3989 def _make_valid_url(cls):
3990 return cls._VALID_URL
3991
3462ffa8 3992 def _real_extract(self, url):
386e1dd9 3993 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3994 query = (qs.get('search_query') or qs.get('q'))[0]
3995 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3996 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3997
3998
3999class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4000 """
25f14e9f 4001 Base class for feed extractors
3d3dddc9 4002 Subclasses must define the _FEED_NAME property.
d7ae0639 4003 """
b2e8bc1b 4004 _LOGIN_REQUIRED = True
ef2f3c7f 4005 _TESTS = []
d7ae0639
JMF
4006
4007 @property
4008 def IE_NAME(self):
78caa52a 4009 return 'youtube:%s' % self._FEED_NAME
04cc9617 4010
81f0259b 4011 def _real_initialize(self):
b2e8bc1b 4012 self._login()
81f0259b 4013
3853309f 4014 def _real_extract(self, url):
3d3dddc9 4015 return self.url_result(
4016 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4017 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4018
4019
ef2f3c7f 4020class YoutubeWatchLaterIE(InfoExtractor):
4021 IE_NAME = 'youtube:watchlater'
70d5c17b 4022 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4023 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4024 _TESTS = [{
8bdd16b4 4025 'url': ':ytwatchlater',
bc7a9cd8
S
4026 'only_matching': True,
4027 }]
25f14e9f
S
4028
4029 def _real_extract(self, url):
ef2f3c7f 4030 return self.url_result(
4031 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4032
4033
25f14e9f
S
4034class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4035 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4036 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4037 _FEED_NAME = 'recommended'
3d3dddc9 4038 _TESTS = [{
4039 'url': ':ytrec',
4040 'only_matching': True,
4041 }, {
4042 'url': ':ytrecommended',
4043 'only_matching': True,
4044 }, {
4045 'url': 'https://youtube.com',
4046 'only_matching': True,
4047 }]
1ed5b5c9 4048
1ed5b5c9 4049
25f14e9f 4050class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4051 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4052 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4053 _FEED_NAME = 'subscriptions'
3d3dddc9 4054 _TESTS = [{
4055 'url': ':ytsubs',
4056 'only_matching': True,
4057 }, {
4058 'url': ':ytsubscriptions',
4059 'only_matching': True,
4060 }]
1ed5b5c9 4061
1ed5b5c9 4062
25f14e9f 4063class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4064 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4065 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4066 _FEED_NAME = 'history'
3d3dddc9 4067 _TESTS = [{
4068 'url': ':ythistory',
4069 'only_matching': True,
4070 }]
1ed5b5c9
JMF
4071
4072
15870e90
PH
4073class YoutubeTruncatedURLIE(InfoExtractor):
4074 IE_NAME = 'youtube:truncated_url'
4075 IE_DESC = False # Do not list
975d35db 4076 _VALID_URL = r'''(?x)
b95aab84
PH
4077 (?:https?://)?
4078 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4079 (?:watch\?(?:
c4808c60 4080 feature=[a-z_]+|
b95aab84
PH
4081 annotation_id=annotation_[^&]+|
4082 x-yt-cl=[0-9]+|
c1708b89 4083 hl=[^&]*|
287be8c6 4084 t=[0-9]+
b95aab84
PH
4085 )?
4086 |
4087 attribution_link\?a=[^&]+
4088 )
4089 $
975d35db 4090 '''
15870e90 4091
c4808c60 4092 _TESTS = [{
2d3d2997 4093 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4094 'only_matching': True,
dc2fc736 4095 }, {
2d3d2997 4096 'url': 'https://www.youtube.com/watch?',
dc2fc736 4097 'only_matching': True,
b95aab84
PH
4098 }, {
4099 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4100 'only_matching': True,
4101 }, {
4102 'url': 'https://www.youtube.com/watch?feature=foo',
4103 'only_matching': True,
c1708b89
PH
4104 }, {
4105 'url': 'https://www.youtube.com/watch?hl=en-GB',
4106 'only_matching': True,
287be8c6
PH
4107 }, {
4108 'url': 'https://www.youtube.com/watch?t=2372',
4109 'only_matching': True,
c4808c60
PH
4110 }]
4111
15870e90
PH
4112 def _real_extract(self, url):
4113 raise ExtractorError(
78caa52a
PH
4114 'Did you forget to quote the URL? Remember that & is a meta '
4115 'character in most shells, so you want to put the URL in quotes, '
3867038a 4116 'like youtube-dl '
2d3d2997 4117 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4118 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4119 expected=True)
772fd5cc
PH
4120
4121
4122class YoutubeTruncatedIDIE(InfoExtractor):
4123 IE_NAME = 'youtube:truncated_id'
4124 IE_DESC = False # Do not list
b95aab84 4125 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4126
4127 _TESTS = [{
4128 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4129 'only_matching': True,
4130 }]
4131
4132 def _real_extract(self, url):
4133 video_id = self._match_id(url)
4134 raise ExtractorError(
4135 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4136 expected=True)