]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[tenplay] Fix extractor (#314)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
fe03a6cd 70 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|'
68b91dc9 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
68217024 88 username, password = self._get_login_info()
b2e8bc1b
JMF
89 # No authentication to be performed
90 if username is None:
a06916d9 91 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 93 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 94 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 95 return True
b2e8bc1b 96
7cc3570e
PH
97 login_page = self._download_webpage(
98 self._LOGIN_URL, None,
69ea8ca4
PH
99 note='Downloading login page',
100 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
101 if login_page is False:
102 return
b2e8bc1b 103
1212e997 104 login_form = self._hidden_inputs(login_page)
c5e8d7af 105
e00eb564
S
106 def req(url, f_req, note, errnote):
107 data = login_form.copy()
108 data.update({
109 'pstMsg': 1,
110 'checkConnection': 'youtube',
111 'checkedDomains': 'youtube',
112 'hl': 'en',
113 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 114 'f.req': json.dumps(f_req),
e00eb564
S
115 'flowName': 'GlifWebSignIn',
116 'flowEntry': 'ServiceLogin',
baf67a60
S
117 # TODO: reverse actual botguard identifier generation algo
118 'bgRequest': '["identifier",""]',
041bc3ad 119 })
e00eb564
S
120 return self._download_json(
121 url, None, note=note, errnote=errnote,
122 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
123 fatal=False,
124 data=urlencode_postdata(data), headers={
125 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
126 'Google-Accounts-XSRF': 1,
127 })
128
3995d37d 129 def warn(message):
6a39ee13 130 self.report_warning(message)
3995d37d
S
131
132 lookup_req = [
133 username,
134 None, [], None, 'US', None, None, 2, False, True,
135 [
136 None, None,
137 [2, 1, None, 1,
138 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
139 None, [], 4],
140 1, [None, None, []], None, None, None, True
141 ],
142 username,
143 ]
144
e00eb564 145 lookup_results = req(
3995d37d 146 self._LOOKUP_URL, lookup_req,
e00eb564
S
147 'Looking up account info', 'Unable to look up account info')
148
149 if lookup_results is False:
150 return False
041bc3ad 151
3995d37d
S
152 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
153 if not user_hash:
154 warn('Unable to extract user hash')
155 return False
156
157 challenge_req = [
158 user_hash,
159 None, 1, None, [1, None, None, None, [password, None, True]],
160 [
161 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
162 1, [None, None, []], None, None, None, True
163 ]]
83317f69 164
3995d37d
S
165 challenge_results = req(
166 self._CHALLENGE_URL, challenge_req,
167 'Logging in', 'Unable to log in')
83317f69 168
3995d37d 169 if challenge_results is False:
e00eb564 170 return
83317f69 171
3995d37d
S
172 login_res = try_get(challenge_results, lambda x: x[0][5], list)
173 if login_res:
174 login_msg = try_get(login_res, lambda x: x[5], compat_str)
175 warn(
176 'Unable to login: %s' % 'Invalid password'
177 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
178 return False
179
180 res = try_get(challenge_results, lambda x: x[0][-1], list)
181 if not res:
182 warn('Unable to extract result entry')
183 return False
184
9a6628aa
S
185 login_challenge = try_get(res, lambda x: x[0][0], list)
186 if login_challenge:
187 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
188 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
189 # SEND_SUCCESS - TFA code has been successfully sent to phone
190 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 191 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
192 if status == 'QUOTA_EXCEEDED':
193 warn('Exceeded the limit of TFA codes, try later')
194 return False
195
196 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
197 if not tl:
198 warn('Unable to extract TL')
199 return False
200
201 tfa_code = self._get_tfa_info('2-step verification code')
202
203 if not tfa_code:
204 warn(
205 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
206 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
207 return False
208
209 tfa_code = remove_start(tfa_code, 'G-')
210
211 tfa_req = [
212 user_hash, None, 2, None,
213 [
214 9, None, None, None, None, None, None, None,
215 [None, tfa_code, True, 2]
216 ]]
217
218 tfa_results = req(
219 self._TFA_URL.format(tl), tfa_req,
220 'Submitting TFA code', 'Unable to submit TFA code')
221
222 if tfa_results is False:
223 return False
224
225 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
226 if tfa_res:
227 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
228 warn(
229 'Unable to finish TFA: %s' % 'Invalid TFA code'
230 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
231 return False
232
233 check_cookie_url = try_get(
234 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
235 else:
236 CHALLENGES = {
237 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
238 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
239 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
240 }
241 challenge = CHALLENGES.get(
242 challenge_str,
243 '%s returned error %s.' % (self.IE_NAME, challenge_str))
244 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
245 return False
3995d37d
S
246 else:
247 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
248
249 if not check_cookie_url:
250 warn('Unable to extract CheckCookie URL')
251 return False
e00eb564
S
252
253 check_cookie_results = self._download_webpage(
3995d37d
S
254 check_cookie_url, None, 'Checking cookie', fatal=False)
255
256 if check_cookie_results is False:
257 return False
e00eb564 258
3995d37d
S
259 if 'https://myaccount.google.com/' not in check_cookie_results:
260 warn('Unable to log in')
b2e8bc1b 261 return False
e00eb564 262
b2e8bc1b
JMF
263 return True
264
cce889b9 265 def _initialize_consent(self):
266 cookies = self._get_cookies('https://www.youtube.com/')
267 if cookies.get('__Secure-3PSID'):
268 return
269 consent_id = None
270 consent = cookies.get('CONSENT')
271 if consent:
272 if 'YES' in consent.value:
273 return
274 consent_id = self._search_regex(
275 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
276 if not consent_id:
277 consent_id = random.randint(100, 999)
278 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 279
b2e8bc1b 280 def _real_initialize(self):
cce889b9 281 self._initialize_consent()
b2e8bc1b
JMF
282 if self._downloader is None:
283 return
b2e8bc1b
JMF
284 if not self._login():
285 return
c5e8d7af 286
f4f751af 287 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
288 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 289 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 290 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
291 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 292
a5c56234
M
293 def _generate_sapisidhash_header(self):
294 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
295 if sapisid_cookie is None:
296 return
297 time_now = round(time.time())
298 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
299 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
300
301 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 302 note='Downloading API JSON', errnote='Unable to download API page',
303 context=None, api_key=None):
304
305 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 306 data.update(query)
f4f751af 307 real_headers = self._generate_api_headers()
308 real_headers.update({'content-type': 'application/json'})
309 if headers:
310 real_headers.update(headers)
545cc85d 311 return self._download_json(
a5c56234
M
312 'https://www.youtube.com/youtubei/v1/%s' % ep,
313 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 314 data=json.dumps(data).encode('utf8'), headers=real_headers,
315 query={'key': api_key or self._extract_api_key()})
316
317 def _extract_api_key(self, ytcfg=None):
318 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 319
8bdd16b4 320 def _extract_yt_initial_data(self, video_id, webpage):
321 return self._parse_json(
322 self._search_regex(
29f7c58a 323 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 324 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 325 video_id)
0c148415 326
a1c5d2ca
M
327 def _extract_identity_token(self, webpage, item_id):
328 ytcfg = self._extract_ytcfg(item_id, webpage)
329 if ytcfg:
330 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
331 if token:
332 return token
333 return self._search_regex(
334 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
335 'identity token', default=None)
336
337 @staticmethod
338 def _extract_account_syncid(data):
8ea3f7b9 339 """
340 Extract syncId required to download private playlists of secondary channels
341 @param data Either response or ytcfg
342 """
343 sync_ids = (try_get(
344 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
345 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
346 if len(sync_ids) >= 2 and sync_ids[1]:
347 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
348 # and just "user_syncid||" for primary channel. We only want the channel_syncid
349 return sync_ids[0]
8ea3f7b9 350 # ytcfg includes channel_syncid if on secondary channel
351 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 352
29f7c58a 353 def _extract_ytcfg(self, video_id, webpage):
8c54a305 354 if not webpage:
355 return {}
29f7c58a 356 return self._parse_json(
357 self._search_regex(
358 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 359 default='{}'), video_id, fatal=False) or {}
360
361 def __extract_client_version(self, ytcfg):
362 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
363
364 def _extract_context(self, ytcfg=None):
365 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
366 if context:
367 return context
368
369 # Recreate the client context (required)
370 client_version = self.__extract_client_version(ytcfg)
371 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
372 context = {
373 'client': {
374 'clientName': client_name,
375 'clientVersion': client_version,
376 }
377 }
378 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
379 if visitor_data:
380 context['client']['visitorData'] = visitor_data
381 return context
382
383 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
384 headers = {
385 'X-YouTube-Client-Name': '1',
386 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
387 }
388 if identity_token:
389 headers['x-youtube-identity-token'] = identity_token
390 if account_syncid:
391 headers['X-Goog-PageId'] = account_syncid
392 headers['X-Goog-AuthUser'] = 0
393 if visitor_data:
394 headers['x-goog-visitor-id'] = visitor_data
395 auth = self._generate_sapisidhash_header()
396 if auth is not None:
397 headers['Authorization'] = auth
398 headers['X-Origin'] = 'https://www.youtube.com'
399 return headers
29f7c58a 400
9297939e 401 @staticmethod
402 def is_music_url(url):
403 return re.match(r'https?://music\.youtube\.com/', url) is not None
404
30a074c2 405 def _extract_video(self, renderer):
406 video_id = renderer.get('videoId')
407 title = try_get(
408 renderer,
409 (lambda x: x['title']['runs'][0]['text'],
410 lambda x: x['title']['simpleText']), compat_str)
411 description = try_get(
412 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
413 compat_str)
414 duration = parse_duration(try_get(
415 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
416 view_count_text = try_get(
417 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
418 view_count = str_to_int(self._search_regex(
419 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
420 'view count', default=None))
421 uploader = try_get(
bc2ca1bb 422 renderer,
423 (lambda x: x['ownerText']['runs'][0]['text'],
424 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 425 return {
39ed931e 426 '_type': 'url',
30a074c2 427 'ie_key': YoutubeIE.ie_key(),
428 'id': video_id,
429 'url': video_id,
430 'title': title,
431 'description': description,
432 'duration': duration,
433 'view_count': view_count,
434 'uploader': uploader,
435 }
436
0c148415 437
360e1ca5 438class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 439 IE_DESC = 'YouTube.com'
bc2ca1bb 440 _INVIDIOUS_SITES = (
441 # invidious-redirect websites
442 r'(?:www\.)?redirect\.invidious\.io',
443 r'(?:(?:www|dev)\.)?invidio\.us',
444 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
445 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 446 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 447 r'(?:(?:www|au)\.)?ytprivate\.com',
448 r'(?:www\.)?invidious\.namazso\.eu',
449 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 450 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
451 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
452 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
453 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
454 # youtube-dl invidious instances list
455 r'(?:(?:www|no)\.)?invidiou\.sh',
456 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
457 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 458 r'(?:www\.)?invidious\.mastodon\.host',
459 r'(?:www\.)?invidious\.zapashcanon\.fr',
460 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 461 r'(?:www\.)?invidious\.tinfoil-hat\.net',
462 r'(?:www\.)?invidious\.himiko\.cloud',
463 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 464 r'(?:www\.)?invidious\.tube',
465 r'(?:www\.)?invidiou\.site',
466 r'(?:www\.)?invidious\.site',
467 r'(?:www\.)?invidious\.xyz',
468 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 469 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 470 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 471 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 472 r'(?:www\.)?tube\.poal\.co',
473 r'(?:www\.)?tube\.connect\.cafe',
474 r'(?:www\.)?vid\.wxzm\.sx',
475 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 476 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 477 r'(?:www\.)?yewtu\.be',
478 r'(?:www\.)?yt\.elukerio\.org',
479 r'(?:www\.)?yt\.lelux\.fi',
480 r'(?:www\.)?invidious\.ggc-project\.de',
481 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 482 r'(?:www\.)?ytprivate\.com',
483 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 484 r'(?:www\.)?invidious\.toot\.koeln',
485 r'(?:www\.)?invidious\.fdn\.fr',
486 r'(?:www\.)?watch\.nettohikari\.com',
487 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
488 r'(?:www\.)?qklhadlycap4cnod\.onion',
489 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
490 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
491 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
492 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
493 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
494 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
495 )
cb7dfeea 496 _VALID_URL = r"""(?x)^
c5e8d7af 497 (
edb53e2d 498 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 499 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
500 (?:www\.)?deturl\.com/www\.youtube\.com|
501 (?:www\.)?pwnyoutube\.com|
502 (?:www\.)?hooktube\.com|
503 (?:www\.)?yourepeat\.com|
504 tube\.majestyc\.net|
505 %(invidious)s|
506 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
507 (?:.*?\#/)? # handle anchor (#/) redirect urls
508 (?: # the various things that can precede the ID:
ac7553d0 509 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 510 |(?: # or the v= param in all its forms
f7000f3a 511 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 512 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 513 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
514 v=
515 )
f4b05232 516 ))
cbaed4bb
S
517 |(?:
518 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
519 vid\.plus| # or vid.plus/xxxx
520 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 521 %(invidious)s
cbaed4bb 522 )/
edb53e2d 523 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 524 )
c5e8d7af 525 )? # all until now is optional -> you can pass the naked ID
201c1459 526 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 527 (?(1).+)? # if we found the ID, everything can follow
9297939e 528 (?:\#|$)""" % {
bc2ca1bb 529 'invidious': '|'.join(_INVIDIOUS_SITES),
530 }
e40c758c 531 _PLAYER_INFO_RE = (
cc2db878 532 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
533 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 534 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 535 )
2c62dc26 536 _formats = {
c2d3cb4c 537 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
538 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
539 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
540 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
541 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
542 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
543 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
544 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 545 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 546 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
547 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
548 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
549 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
550 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
551 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 552 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 553 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
554 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 555
556
557 # 3D videos
c2d3cb4c 558 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
559 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
560 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
561 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 562 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
563 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
564 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 565
96fb5605 566 # Apple HTTP Live Streaming
11f12195 567 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 568 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
569 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
570 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
571 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
572 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 573 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
574 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
575
576 # DASH mp4 video
d23028a8
S
577 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
578 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
579 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
580 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
581 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 582 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
583 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
584 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
585 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
586 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
587 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
588 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 589
f6f1fc92 590 # Dash mp4 audio
d23028a8
S
591 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
592 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
593 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
594 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
595 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
596 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
597 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
598
599 # Dash webm
d23028a8
S
600 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
601 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
602 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
603 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
604 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
605 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
606 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
607 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
608 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
609 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
610 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
611 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
612 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
613 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
614 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 615 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
616 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
617 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
618 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
619 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
620 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
621 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
622
623 # Dash webm audio
d23028a8
S
624 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
625 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 626
0857baad 627 # Dash webm audio with opus inside
d23028a8
S
628 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
629 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
630 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 631
ce6b9a2d
PH
632 # RTMP (unnamed)
633 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
634
635 # av01 video only formats sometimes served with "unknown" codecs
636 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
637 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
638 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
639 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 640 }
29f7c58a 641 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 642
fd5c4aab
S
643 _GEO_BYPASS = False
644
78caa52a 645 IE_NAME = 'youtube'
2eb88d95
PH
646 _TESTS = [
647 {
2d3d2997 648 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
649 'info_dict': {
650 'id': 'BaW_jenozKc',
651 'ext': 'mp4',
3867038a 652 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
653 'uploader': 'Philipp Hagemeister',
654 'uploader_id': 'phihag',
ec85ded8 655 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
656 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
657 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 658 'upload_date': '20121002',
3867038a 659 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 660 'categories': ['Science & Technology'],
3867038a 661 'tags': ['youtube-dl'],
556dbe7f 662 'duration': 10,
dbdaaa23 663 'view_count': int,
3e7c1224
PH
664 'like_count': int,
665 'dislike_count': int,
7c80519c 666 'start_time': 1,
297a564b 667 'end_time': 9,
2eb88d95 668 }
0e853ca4 669 },
fccd3771 670 {
4bc3a23e
PH
671 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
672 'note': 'Embed-only video (#1746)',
673 'info_dict': {
674 'id': 'yZIXLfi8CZQ',
675 'ext': 'mp4',
676 'upload_date': '20120608',
677 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
678 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
679 'uploader': 'SET India',
94bfcd23 680 'uploader_id': 'setindia',
ec85ded8 681 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 682 'age_limit': 18,
545cc85d 683 },
684 'skip': 'Private video',
fccd3771 685 },
11b56058 686 {
8bdd16b4 687 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
688 'note': 'Use the first video ID in the URL',
689 'info_dict': {
690 'id': 'BaW_jenozKc',
691 'ext': 'mp4',
3867038a 692 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
693 'uploader': 'Philipp Hagemeister',
694 'uploader_id': 'phihag',
ec85ded8 695 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 696 'upload_date': '20121002',
3867038a 697 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 698 'categories': ['Science & Technology'],
3867038a 699 'tags': ['youtube-dl'],
556dbe7f 700 'duration': 10,
dbdaaa23 701 'view_count': int,
11b56058
PM
702 'like_count': int,
703 'dislike_count': int,
34a7de29
S
704 },
705 'params': {
706 'skip_download': True,
707 },
11b56058 708 },
dd27fd17 709 {
2d3d2997 710 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
711 'note': '256k DASH audio (format 141) via DASH manifest',
712 'info_dict': {
713 'id': 'a9LDPn-MO4I',
714 'ext': 'm4a',
715 'upload_date': '20121002',
716 'uploader_id': '8KVIDEO',
ec85ded8 717 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
718 'description': '',
719 'uploader': '8KVIDEO',
720 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 721 },
4bc3a23e
PH
722 'params': {
723 'youtube_include_dash_manifest': True,
724 'format': '141',
4919603f 725 },
de3c7fe0 726 'skip': 'format 141 not served anymore',
dd27fd17 727 },
8bdd16b4 728 # DASH manifest with encrypted signature
729 {
730 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
731 'info_dict': {
732 'id': 'IB3lcPjvWLA',
733 'ext': 'm4a',
734 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
735 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
736 'duration': 244,
737 'uploader': 'AfrojackVEVO',
738 'uploader_id': 'AfrojackVEVO',
739 'upload_date': '20131011',
cc2db878 740 'abr': 129.495,
8bdd16b4 741 },
742 'params': {
743 'youtube_include_dash_manifest': True,
744 'format': '141/bestaudio[ext=m4a]',
745 },
746 },
aa79ac0c
PH
747 # Controversy video
748 {
749 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
750 'info_dict': {
751 'id': 'T4XJQO3qol8',
752 'ext': 'mp4',
556dbe7f 753 'duration': 219,
aa79ac0c 754 'upload_date': '20100909',
4fe54c12 755 'uploader': 'Amazing Atheist',
aa79ac0c 756 'uploader_id': 'TheAmazingAtheist',
ec85ded8 757 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 758 'title': 'Burning Everyone\'s Koran',
545cc85d 759 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 760 }
c522adb1 761 },
dd2d55f1 762 # Normal age-gate video (embed allowed)
c522adb1 763 {
2d3d2997 764 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
765 'info_dict': {
766 'id': 'HtVdAasjOgU',
767 'ext': 'mp4',
768 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 769 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 770 'duration': 142,
c522adb1
JMF
771 'uploader': 'The Witcher',
772 'uploader_id': 'WitcherGame',
ec85ded8 773 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 774 'upload_date': '20140605',
34952f09 775 'age_limit': 18,
c522adb1
JMF
776 },
777 },
8bdd16b4 778 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
779 # YouTube Red ad is not captured for creator
780 {
781 'url': '__2ABJjxzNo',
782 'info_dict': {
783 'id': '__2ABJjxzNo',
784 'ext': 'mp4',
785 'duration': 266,
786 'upload_date': '20100430',
787 'uploader_id': 'deadmau5',
788 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 789 'creator': 'deadmau5',
790 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 791 'uploader': 'deadmau5',
792 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 793 'alt_title': 'Some Chords',
8bdd16b4 794 },
795 'expected_warnings': [
796 'DASH manifest missing',
797 ]
798 },
067aa17e 799 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
800 {
801 'url': 'lqQg6PlCWgI',
802 'info_dict': {
803 'id': 'lqQg6PlCWgI',
804 'ext': 'mp4',
556dbe7f 805 'duration': 6085,
90227264 806 'upload_date': '20150827',
cbe2bd91 807 'uploader_id': 'olympic',
ec85ded8 808 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 809 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 810 'uploader': 'Olympic',
cbe2bd91
PH
811 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
812 },
813 'params': {
814 'skip_download': 'requires avconv',
e52a40ab 815 }
cbe2bd91 816 },
6271f1ca
PH
817 # Non-square pixels
818 {
819 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
820 'info_dict': {
821 'id': '_b-2C3KPAM0',
822 'ext': 'mp4',
823 'stretched_ratio': 16 / 9.,
556dbe7f 824 'duration': 85,
6271f1ca
PH
825 'upload_date': '20110310',
826 'uploader_id': 'AllenMeow',
ec85ded8 827 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 828 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 829 'uploader': '孫ᄋᄅ',
6271f1ca
PH
830 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
831 },
06b491eb
S
832 },
833 # url_encoded_fmt_stream_map is empty string
834 {
835 'url': 'qEJwOuvDf7I',
836 'info_dict': {
837 'id': 'qEJwOuvDf7I',
f57b7835 838 'ext': 'webm',
06b491eb
S
839 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
840 'description': '',
841 'upload_date': '20150404',
842 'uploader_id': 'spbelect',
843 'uploader': 'Наблюдатели Петербурга',
844 },
845 'params': {
846 'skip_download': 'requires avconv',
e323cf3f
S
847 },
848 'skip': 'This live event has ended.',
06b491eb 849 },
067aa17e 850 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
851 {
852 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
853 'info_dict': {
854 'id': 'FIl7x6_3R5Y',
eb6793ba 855 'ext': 'webm',
da77d856
S
856 'title': 'md5:7b81415841e02ecd4313668cde88737a',
857 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 858 'duration': 220,
da77d856
S
859 'upload_date': '20150625',
860 'uploader_id': 'dorappi2000',
ec85ded8 861 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 862 'uploader': 'dorappi2000',
eb6793ba 863 'formats': 'mincount:31',
da77d856 864 },
eb6793ba 865 'skip': 'not actual anymore',
2ee8f5d8 866 },
8a1a26ce
YCH
867 # DASH manifest with segment_list
868 {
869 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
870 'md5': '8ce563a1d667b599d21064e982ab9e31',
871 'info_dict': {
872 'id': 'CsmdDsKjzN8',
873 'ext': 'mp4',
17ee98e1 874 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
875 'uploader': 'Airtek',
876 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
877 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
878 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
879 },
880 'params': {
881 'youtube_include_dash_manifest': True,
882 'format': '135', # bestvideo
be49068d
S
883 },
884 'skip': 'This live event has ended.',
2ee8f5d8 885 },
cf7e015f
S
886 {
887 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 888 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 889 'info_dict': {
545cc85d 890 'id': 'jvGDaLqkpTg',
891 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
892 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
893 },
894 'playlist': [{
895 'info_dict': {
545cc85d 896 'id': 'jvGDaLqkpTg',
cf7e015f 897 'ext': 'mp4',
545cc85d 898 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
899 'description': 'md5:e03b909557865076822aa169218d6a5d',
900 'duration': 10643,
901 'upload_date': '20161111',
902 'uploader': 'Team PGP',
903 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
904 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
905 },
906 }, {
907 'info_dict': {
545cc85d 908 'id': '3AKt1R1aDnw',
cf7e015f 909 'ext': 'mp4',
545cc85d 910 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
911 'description': 'md5:e03b909557865076822aa169218d6a5d',
912 'duration': 10991,
913 'upload_date': '20161111',
914 'uploader': 'Team PGP',
915 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
916 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
917 },
918 }, {
919 'info_dict': {
545cc85d 920 'id': 'RtAMM00gpVc',
cf7e015f 921 'ext': 'mp4',
545cc85d 922 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
923 'description': 'md5:e03b909557865076822aa169218d6a5d',
924 'duration': 10995,
925 'upload_date': '20161111',
926 'uploader': 'Team PGP',
927 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
928 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
929 },
930 }, {
931 'info_dict': {
545cc85d 932 'id': '6N2fdlP3C5U',
cf7e015f 933 'ext': 'mp4',
545cc85d 934 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
935 'description': 'md5:e03b909557865076822aa169218d6a5d',
936 'duration': 10990,
937 'upload_date': '20161111',
938 'uploader': 'Team PGP',
939 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
940 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
941 },
942 }],
943 'params': {
944 'skip_download': True,
945 },
cbaed4bb 946 },
f9f49d87 947 {
067aa17e 948 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
949 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
950 'info_dict': {
951 'id': 'gVfLd0zydlo',
952 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
953 },
954 'playlist_count': 2,
be49068d 955 'skip': 'Not multifeed anymore',
f9f49d87 956 },
cbaed4bb 957 {
2d3d2997 958 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 959 'only_matching': True,
0e49d9a6 960 },
6d4fc66b 961 {
2d3d2997 962 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
963 'only_matching': True,
964 },
0e49d9a6 965 {
067aa17e 966 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 967 # Also tests cut-off URL expansion in video description (see
067aa17e
S
968 # https://github.com/ytdl-org/youtube-dl/issues/1892,
969 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
970 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
971 'info_dict': {
972 'id': 'lsguqyKfVQg',
973 'ext': 'mp4',
974 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 975 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 976 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 977 'duration': 133,
0e49d9a6
LL
978 'upload_date': '20151119',
979 'uploader_id': 'IronSoulElf',
ec85ded8 980 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 981 'uploader': 'IronSoulElf',
eb6793ba
S
982 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
983 'track': 'Dark Walk - Position Music',
984 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 985 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
986 },
987 'params': {
988 'skip_download': True,
989 },
990 },
61f92af1 991 {
067aa17e 992 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
993 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
994 'only_matching': True,
995 },
313dfc45
LL
996 {
997 # Video with yt:stretch=17:0
998 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
999 'info_dict': {
1000 'id': 'Q39EVAstoRM',
1001 'ext': 'mp4',
1002 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1003 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1004 'upload_date': '20151107',
1005 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1006 'uploader': 'CH GAMER DROID',
1007 },
1008 'params': {
1009 'skip_download': True,
1010 },
be49068d 1011 'skip': 'This video does not exist.',
313dfc45 1012 },
201c1459 1013 {
1014 # Video with incomplete 'yt:stretch=16:'
1015 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1016 'only_matching': True,
1017 },
7caf9830
S
1018 {
1019 # Video licensed under Creative Commons
1020 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1021 'info_dict': {
1022 'id': 'M4gD1WSo5mA',
1023 'ext': 'mp4',
1024 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1025 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1026 'duration': 721,
7caf9830
S
1027 'upload_date': '20150127',
1028 'uploader_id': 'BerkmanCenter',
ec85ded8 1029 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1030 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1031 'license': 'Creative Commons Attribution license (reuse allowed)',
1032 },
1033 'params': {
1034 'skip_download': True,
1035 },
1036 },
fd050249
S
1037 {
1038 # Channel-like uploader_url
1039 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1040 'info_dict': {
1041 'id': 'eQcmzGIKrzg',
1042 'ext': 'mp4',
1043 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1044 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1045 'duration': 4060,
fd050249 1046 'upload_date': '20151119',
eb6793ba 1047 'uploader': 'Bernie Sanders',
fd050249 1048 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1049 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1050 'license': 'Creative Commons Attribution license (reuse allowed)',
1051 },
1052 'params': {
1053 'skip_download': True,
1054 },
1055 },
040ac686
S
1056 {
1057 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1058 'only_matching': True,
7f29cf54
S
1059 },
1060 {
067aa17e 1061 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1062 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1063 'only_matching': True,
6496ccb4
S
1064 },
1065 {
1066 # Rental video preview
1067 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1068 'info_dict': {
1069 'id': 'uGpuVWrhIzE',
1070 'ext': 'mp4',
1071 'title': 'Piku - Trailer',
1072 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1073 'upload_date': '20150811',
1074 'uploader': 'FlixMatrix',
1075 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1076 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1077 'license': 'Standard YouTube License',
1078 },
1079 'params': {
1080 'skip_download': True,
1081 },
eb6793ba 1082 'skip': 'This video is not available.',
022a5d66 1083 },
12afdc2a
S
1084 {
1085 # YouTube Red video with episode data
1086 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1087 'info_dict': {
1088 'id': 'iqKdEhx-dD4',
1089 'ext': 'mp4',
1090 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1091 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1092 'duration': 2085,
12afdc2a
S
1093 'upload_date': '20170118',
1094 'uploader': 'Vsauce',
1095 'uploader_id': 'Vsauce',
1096 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1097 'series': 'Mind Field',
1098 'season_number': 1,
1099 'episode_number': 1,
1100 },
1101 'params': {
1102 'skip_download': True,
1103 },
1104 'expected_warnings': [
1105 'Skipping DASH manifest',
1106 ],
1107 },
c7121fa7
S
1108 {
1109 # The following content has been identified by the YouTube community
1110 # as inappropriate or offensive to some audiences.
1111 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1112 'info_dict': {
1113 'id': '6SJNVb0GnPI',
1114 'ext': 'mp4',
1115 'title': 'Race Differences in Intelligence',
1116 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1117 'duration': 965,
1118 'upload_date': '20140124',
1119 'uploader': 'New Century Foundation',
1120 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1121 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1122 },
1123 'params': {
1124 'skip_download': True,
1125 },
545cc85d 1126 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1127 },
022a5d66
S
1128 {
1129 # itag 212
1130 'url': '1t24XAntNCY',
1131 'only_matching': True,
fd5c4aab
S
1132 },
1133 {
1134 # geo restricted to JP
1135 'url': 'sJL6WA-aGkQ',
1136 'only_matching': True,
1137 },
cd5a74a2
S
1138 {
1139 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1140 'only_matching': True,
1141 },
bc2ca1bb 1142 {
1143 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1144 'only_matching': True,
1145 },
1146 {
1147 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1148 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1149 'only_matching': True,
1150 },
825cd268
RA
1151 {
1152 # DRM protected
1153 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1154 'only_matching': True,
4fe54c12
S
1155 },
1156 {
1157 # Video with unsupported adaptive stream type formats
1158 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1159 'info_dict': {
1160 'id': 'Z4Vy8R84T1U',
1161 'ext': 'mp4',
1162 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1163 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1164 'duration': 433,
1165 'upload_date': '20130923',
1166 'uploader': 'Amelia Putri Harwita',
1167 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1168 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1169 'formats': 'maxcount:10',
1170 },
1171 'params': {
1172 'skip_download': True,
1173 'youtube_include_dash_manifest': False,
1174 },
5429d6a9 1175 'skip': 'not actual anymore',
5caabd3c 1176 },
1177 {
822b9d9c 1178 # Youtube Music Auto-generated description
5caabd3c 1179 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1180 'info_dict': {
1181 'id': 'MgNrAu2pzNs',
1182 'ext': 'mp4',
1183 'title': 'Voyeur Girl',
1184 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1185 'upload_date': '20190312',
5429d6a9
S
1186 'uploader': 'Stephen - Topic',
1187 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1188 'artist': 'Stephen',
1189 'track': 'Voyeur Girl',
1190 'album': 'it\'s too much love to know my dear',
1191 'release_date': '20190313',
1192 'release_year': 2019,
1193 },
1194 'params': {
1195 'skip_download': True,
1196 },
1197 },
66b48727
RA
1198 {
1199 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1200 'only_matching': True,
1201 },
011e75e6
S
1202 {
1203 # invalid -> valid video id redirection
1204 'url': 'DJztXj2GPfl',
1205 'info_dict': {
1206 'id': 'DJztXj2GPfk',
1207 'ext': 'mp4',
1208 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1209 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1210 'upload_date': '20090125',
1211 'uploader': 'Prochorowka',
1212 'uploader_id': 'Prochorowka',
1213 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1214 'artist': 'Panjabi MC',
1215 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1216 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1217 },
1218 'params': {
1219 'skip_download': True,
1220 },
545cc85d 1221 'skip': 'Video unavailable',
ea74e00b
DP
1222 },
1223 {
1224 # empty description results in an empty string
1225 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1226 'info_dict': {
1227 'id': 'x41yOUIvK2k',
1228 'ext': 'mp4',
1229 'title': 'IMG 3456',
1230 'description': '',
1231 'upload_date': '20170613',
1232 'uploader_id': 'ElevageOrVert',
1233 'uploader': 'ElevageOrVert',
1234 },
1235 'params': {
1236 'skip_download': True,
1237 },
1238 },
a0566bbf 1239 {
29f7c58a 1240 # with '};' inside yt initial data (see [1])
1241 # see [2] for an example with '};' inside ytInitialPlayerResponse
1242 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1243 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1244 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1245 'info_dict': {
1246 'id': 'CHqg6qOn4no',
1247 'ext': 'mp4',
1248 'title': 'Part 77 Sort a list of simple types in c#',
1249 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1250 'upload_date': '20130831',
1251 'uploader_id': 'kudvenkat',
1252 'uploader': 'kudvenkat',
1253 },
1254 'params': {
1255 'skip_download': True,
1256 },
1257 },
29f7c58a 1258 {
1259 # another example of '};' in ytInitialData
1260 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1261 'only_matching': True,
1262 },
1263 {
1264 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1265 'only_matching': True,
1266 },
545cc85d 1267 {
cc2db878 1268 # https://github.com/ytdl-org/youtube-dl/pull/28094
1269 'url': 'OtqTfy26tG0',
1270 'info_dict': {
1271 'id': 'OtqTfy26tG0',
1272 'ext': 'mp4',
1273 'title': 'Burn Out',
1274 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1275 'upload_date': '20141120',
1276 'uploader': 'The Cinematic Orchestra - Topic',
1277 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1278 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1279 'artist': 'The Cinematic Orchestra',
1280 'track': 'Burn Out',
1281 'album': 'Every Day',
1282 'release_data': None,
1283 'release_year': None,
1284 },
1285 'params': {
1286 'skip_download': True,
1287 },
545cc85d 1288 },
bc2ca1bb 1289 {
1290 # controversial video, only works with bpctr when authenticated with cookies
1291 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1292 'only_matching': True,
1293 },
f7ad7160 1294 {
1295 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1296 'url': 'cBvYw8_A0vQ',
1297 'info_dict': {
1298 'id': 'cBvYw8_A0vQ',
1299 'ext': 'mp4',
1300 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1301 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1302 'upload_date': '20201120',
1303 'uploader': 'Walk around Japan',
1304 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1305 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1306 },
1307 'params': {
1308 'skip_download': True,
1309 },
0fb983f6 1310 }, {
1311 # Has multiple audio streams
1312 'url': 'WaOKSUlf4TM',
1313 'only_matching': True
9297939e 1314 }, {
1315 # Requires Premium: has format 141 when requested using YTM url
1316 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1317 'only_matching': True
1318 }, {
120916da 1319 # multiple subtitles with same lang_code
1320 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1321 'only_matching': True,
1322 },
2eb88d95
PH
1323 ]
1324
201c1459 1325 @classmethod
1326 def suitable(cls, url):
1bdae7d3 1327 # Hack for lazy extractors until more generic solution is implemented
1328 # (see #28780)
1329 from .youtube import parse_qs
201c1459 1330 qs = parse_qs(url)
1331 if qs.get('list', [None])[0]:
1332 return False
1333 return super(YoutubeIE, cls).suitable(url)
1334
e0df6211
PH
1335 def __init__(self, *args, **kwargs):
1336 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1337 self._code_cache = {}
83799698 1338 self._player_cache = {}
e0df6211 1339
60064c53
PH
1340 def _signature_cache_id(self, example_sig):
1341 """ Return a string representation of a signature """
78caa52a 1342 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1343
e40c758c
S
1344 @classmethod
1345 def _extract_player_info(cls, player_url):
1346 for player_re in cls._PLAYER_INFO_RE:
1347 id_m = re.search(player_re, player_url)
1348 if id_m:
1349 break
1350 else:
c081b35c 1351 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1352 return id_m.group('id')
e40c758c
S
1353
1354 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1355 player_id = self._extract_player_info(player_url)
e0df6211 1356
c4417ddb 1357 # Read from filesystem cache
545cc85d 1358 func_id = 'js_%s_%s' % (
1359 player_id, self._signature_cache_id(example_sig))
c4417ddb 1360 assert os.path.basename(func_id) == func_id
a0e07d31 1361
69ea8ca4 1362 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1363 if cache_spec is not None:
78caa52a 1364 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1365
545cc85d 1366 if player_id not in self._code_cache:
1367 self._code_cache[player_id] = self._download_webpage(
e0df6211 1368 player_url, video_id,
545cc85d 1369 note='Downloading player ' + player_id,
69ea8ca4 1370 errnote='Download of %s failed' % player_url)
545cc85d 1371 code = self._code_cache[player_id]
1372 res = self._parse_sig_js(code)
e0df6211 1373
785521bf
PH
1374 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1375 cache_res = res(test_string)
1376 cache_spec = [ord(c) for c in cache_res]
83799698 1377
69ea8ca4 1378 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1379 return res
1380
60064c53 1381 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1382 def gen_sig_code(idxs):
1383 def _genslice(start, end, step):
78caa52a 1384 starts = '' if start == 0 else str(start)
8bcc8756 1385 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1386 steps = '' if step == 1 else (':%d' % step)
78caa52a 1387 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1388
1389 step = None
7af808a5
PH
1390 # Quelch pyflakes warnings - start will be set when step is set
1391 start = '(Never used)'
edf3e38e
PH
1392 for i, prev in zip(idxs[1:], idxs[:-1]):
1393 if step is not None:
1394 if i - prev == step:
1395 continue
1396 yield _genslice(start, prev, step)
1397 step = None
1398 continue
1399 if i - prev in [-1, 1]:
1400 step = i - prev
1401 start = prev
1402 continue
1403 else:
78caa52a 1404 yield 's[%d]' % prev
edf3e38e 1405 if step is None:
78caa52a 1406 yield 's[%d]' % i
edf3e38e
PH
1407 else:
1408 yield _genslice(start, i, step)
1409
78caa52a 1410 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1411 cache_res = func(test_string)
edf3e38e 1412 cache_spec = [ord(c) for c in cache_res]
78caa52a 1413 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1414 signature_id_tuple = '(%s)' % (
1415 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1416 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1417 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1418 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1419
e0df6211
PH
1420 def _parse_sig_js(self, jscode):
1421 funcname = self._search_regex(
abefc03f
S
1422 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1423 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1424 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1425 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1426 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1427 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1428 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1429 # Obsolete patterns
1430 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1431 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1432 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1433 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1434 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1435 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1436 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1437 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1438 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1439
1440 jsi = JSInterpreter(jscode)
1441 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1442 return lambda s: initial_function([s])
1443
545cc85d 1444 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1445 """Turn the encrypted s field into a working signature"""
6b37f0be 1446
c8bf86d5 1447 if player_url is None:
69ea8ca4 1448 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1449
69ea8ca4 1450 if player_url.startswith('//'):
78caa52a 1451 player_url = 'https:' + player_url
3c90cc8b
S
1452 elif not re.match(r'https?://', player_url):
1453 player_url = compat_urlparse.urljoin(
1454 'https://www.youtube.com', player_url)
c8bf86d5 1455 try:
62af3a0e 1456 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1457 if player_id not in self._player_cache:
1458 func = self._extract_signature_function(
60064c53 1459 video_id, player_url, s
c8bf86d5
PH
1460 )
1461 self._player_cache[player_id] = func
1462 func = self._player_cache[player_id]
a06916d9 1463 if self.get_param('youtube_print_sig_code'):
60064c53 1464 self._print_sig_code(func, s)
c8bf86d5
PH
1465 return func(s)
1466 except Exception as e:
1467 tb = traceback.format_exc()
1468 raise ExtractorError(
78caa52a 1469 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1470
545cc85d 1471 def _mark_watched(self, video_id, player_response):
21c340b8
S
1472 playback_url = url_or_none(try_get(
1473 player_response,
545cc85d 1474 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1475 if not playback_url:
1476 return
1477 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1478 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1479
1480 # cpn generation algorithm is reverse engineered from base.js.
1481 # In fact it works even with dummy cpn.
1482 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1483 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1484
1485 qs.update({
1486 'ver': ['2'],
1487 'cpn': [cpn],
1488 })
1489 playback_url = compat_urlparse.urlunparse(
15707c7e 1490 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1491
1492 self._download_webpage(
1493 playback_url, video_id, 'Marking watched',
1494 'Unable to mark watched', fatal=False)
1495
66c9fa36
S
1496 @staticmethod
1497 def _extract_urls(webpage):
1498 # Embedded YouTube player
1499 entries = [
1500 unescapeHTML(mobj.group('url'))
1501 for mobj in re.finditer(r'''(?x)
1502 (?:
1503 <iframe[^>]+?src=|
1504 data-video-url=|
1505 <embed[^>]+?src=|
1506 embedSWF\(?:\s*|
1507 <object[^>]+data=|
1508 new\s+SWFObject\(
1509 )
1510 (["\'])
1511 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1512 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1513 \1''', webpage)]
1514
1515 # lazyYT YouTube embed
1516 entries.extend(list(map(
1517 unescapeHTML,
1518 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1519
1520 # Wordpress "YouTube Video Importer" plugin
1521 matches = re.findall(r'''(?x)<div[^>]+
1522 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1523 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1524 entries.extend(m[-1] for m in matches)
1525
1526 return entries
1527
1528 @staticmethod
1529 def _extract_url(webpage):
1530 urls = YoutubeIE._extract_urls(webpage)
1531 return urls[0] if urls else None
1532
97665381
PH
1533 @classmethod
1534 def extract_id(cls, url):
1535 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1536 if mobj is None:
69ea8ca4 1537 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1538 video_id = mobj.group(2)
1539 return video_id
1540
545cc85d 1541 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1542 chapters_list = try_get(
8bdd16b4 1543 data,
84213ea8
S
1544 lambda x: x['playerOverlays']
1545 ['playerOverlayRenderer']
1546 ['decoratedPlayerBarRenderer']
1547 ['decoratedPlayerBarRenderer']
1548 ['playerBar']
1549 ['chapteredPlayerBarRenderer']
1550 ['chapters'],
1551 list)
1552 if not chapters_list:
1553 return
1554
1555 def chapter_time(chapter):
1556 return float_or_none(
1557 try_get(
1558 chapter,
1559 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1560 int),
1561 scale=1000)
1562 chapters = []
1563 for next_num, chapter in enumerate(chapters_list, start=1):
1564 start_time = chapter_time(chapter)
1565 if start_time is None:
1566 continue
1567 end_time = (chapter_time(chapters_list[next_num])
1568 if next_num < len(chapters_list) else duration)
1569 if end_time is None:
1570 continue
1571 title = try_get(
1572 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1573 compat_str)
1574 chapters.append({
1575 'start_time': start_time,
1576 'end_time': end_time,
1577 'title': title,
1578 })
1579 return chapters
1580
545cc85d 1581 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1582 return self._parse_json(self._search_regex(
1583 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1584 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1585
d92f5d5a 1586 @staticmethod
1587 def parse_time_text(time_text):
1588 """
1589 Parse the comment time text
1590 time_text is in the format 'X units ago (edited)'
1591 """
1592 time_text_split = time_text.split(' ')
1593 if len(time_text_split) >= 3:
1594 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1595
a1c5d2ca
M
1596 @staticmethod
1597 def _join_text_entries(runs):
1598 text = None
1599 for run in runs:
1600 if not isinstance(run, dict):
1601 continue
1602 sub_text = try_get(run, lambda x: x['text'], compat_str)
1603 if sub_text:
1604 if not text:
1605 text = sub_text
1606 continue
1607 text += sub_text
1608 return text
1609
1610 def _extract_comment(self, comment_renderer, parent=None):
1611 comment_id = comment_renderer.get('commentId')
1612 if not comment_id:
1613 return
1614 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1615 text = self._join_text_entries(comment_text_runs) or ''
1616 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1617 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1618 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1619 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1620 author_id = try_get(comment_renderer,
1621 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1622 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1623 lambda x: x['likeCount']), compat_str)) or 0
1624 author_thumbnail = try_get(comment_renderer,
1625 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1626
1627 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1628 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1629 return {
1630 'id': comment_id,
1631 'text': text,
d92f5d5a 1632 'timestamp': timestamp,
a1c5d2ca
M
1633 'time_text': time_text,
1634 'like_count': votes,
1635 'is_favorited': is_liked,
1636 'author': author,
1637 'author_id': author_id,
1638 'author_thumbnail': author_thumbnail,
1639 'author_is_uploader': author_is_uploader,
1640 'parent': parent or 'root'
1641 }
1642
1643 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1644 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1645
1646 def extract_thread(parent_renderer):
1647 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1648 if not parent:
1649 comment_counts[2] = 0
1650 for content in contents:
1651 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1652 comment_renderer = try_get(
1653 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1654 content, (lambda x: x['commentRenderer'], dict))
1655
1656 if not comment_renderer:
1657 continue
1658 comment = self._extract_comment(comment_renderer, parent)
1659 if not comment:
1660 continue
1661 comment_counts[0] += 1
1662 yield comment
1663 # Attempt to get the replies
1664 comment_replies_renderer = try_get(
1665 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1666
1667 if comment_replies_renderer:
1668 comment_counts[2] += 1
1669 comment_entries_iter = self._comment_entries(
f4f751af 1670 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1671 parent=comment.get('id'), session_token_list=session_token_list,
1672 comment_counts=comment_counts)
1673
1674 for reply_comment in comment_entries_iter:
1675 yield reply_comment
1676
1677 if not comment_counts:
1678 # comment so far, est. total comments, current comment thread #
1679 comment_counts = [0, 0, 0]
a1c5d2ca
M
1680
1681 # TODO: Generalize the download code with TabIE
f4f751af 1682 context = self._extract_context(ytcfg)
1683 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1684 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1685 first_continuation = False
1686 if parent is None:
1687 first_continuation = True
1688
1689 for page_num in itertools.count(0):
1690 if not continuation:
1691 break
f4f751af 1692 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a06916d9 1693 retries = self.get_param('extractor_retries', 3)
a1c5d2ca
M
1694 count = -1
1695 last_error = None
1696
1697 while count < retries:
1698 count += 1
1699 if last_error:
1700 self.report_warning('%s. Retrying ...' % last_error)
1701 try:
1702 query = {
1703 'ctoken': continuation['ctoken'],
1704 'pbj': 1,
1705 'type': 'next',
1706 }
1707 if parent:
1708 query['action_get_comment_replies'] = 1
1709 else:
1710 query['action_get_comments'] = 1
1711
1712 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1713 if page_num == 0:
1714 if first_continuation:
d92f5d5a 1715 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1716 else:
d92f5d5a 1717 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1718 else:
d92f5d5a 1719 note_prefix = '%sDownloading comment%s page %d %s' % (
1720 ' ' if parent else '',
a1c5d2ca
M
1721 ' replies' if parent else '',
1722 page_num,
1723 comment_prog_str)
1724
1725 browse = self._download_json(
1726 'https://www.youtube.com/comment_service_ajax', None,
1727 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1728 headers=headers, query=query,
1729 data=urlencode_postdata({
1730 'session_token': session_token_list[0]
1731 }))
1732 except ExtractorError as e:
1733 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1734 if e.cause.code == 413:
d92f5d5a 1735 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1736 return
1737 # Downloading page may result in intermittent 5xx HTTP error
1738 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1739 last_error = 'HTTP Error %s' % e.cause.code
1740 if e.cause.code == 404:
d92f5d5a 1741 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1742 if count < retries:
1743 continue
1744 raise
1745 else:
1746 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1747 if session_token:
1748 session_token_list[0] = session_token
1749
1750 response = try_get(browse,
1751 (lambda x: x['response'],
1752 lambda x: x[1]['response'])) or {}
1753
1754 if response.get('continuationContents'):
1755 break
1756
1757 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1758 if browse.get('reload'):
d92f5d5a 1759 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1760
1761 # TODO: not tested, merged from old extractor
1762 err_msg = browse.get('externalErrorMessage')
1763 if err_msg:
1764 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1765
1766 # Youtube sometimes sends incomplete data
1767 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1768 last_error = 'Incomplete data received'
1769 if count >= retries:
6a39ee13 1770 raise ExtractorError(last_error)
a1c5d2ca
M
1771
1772 if not response:
1773 break
f4f751af 1774 visitor_data = try_get(
1775 response,
1776 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1777 compat_str) or visitor_data
a1c5d2ca
M
1778
1779 known_continuation_renderers = {
1780 'itemSectionContinuation': extract_thread,
1781 'commentRepliesContinuation': extract_thread
1782 }
1783
1784 # extract next root continuation from the results
1785 continuation_contents = try_get(
1786 response, lambda x: x['continuationContents'], dict) or {}
1787
1788 for key, value in continuation_contents.items():
1789 if key not in known_continuation_renderers:
1790 continue
1791 continuation_renderer = value
1792
1793 if first_continuation:
1794 first_continuation = False
1795 expected_comment_count = try_get(
1796 continuation_renderer,
1797 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1798 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1799 compat_str)
1800
1801 if expected_comment_count:
1802 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1803 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1804 yield comment_counts[1]
1805
1806 # TODO: cli arg.
1807 # 1/True for newest, 0/False for popular (default)
1808 comment_sort_index = int(True)
1809 sort_continuation_renderer = try_get(
1810 continuation_renderer,
1811 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1812 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1813 # If this fails, the initial continuation page
1814 # starts off with popular anyways.
1815 if sort_continuation_renderer:
1816 continuation = YoutubeTabIE._build_continuation_query(
1817 continuation=sort_continuation_renderer.get('continuation'),
1818 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1819 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1820 break
1821
1822 for entry in known_continuation_renderers[key](continuation_renderer):
1823 yield entry
1824
1825 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1826 break
1827
1828 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1829 """Entry for comment extraction"""
1830 comments = []
1831 known_entry_comment_renderers = (
1832 'itemSectionRenderer',
1833 )
1834 estimated_total = 0
1835 for entry in contents:
1836 for key, renderer in entry.items():
1837 if key not in known_entry_comment_renderers:
1838 continue
1839
1840 comment_iter = self._comment_entries(
1841 renderer,
1842 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1843 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1844 ytcfg=ytcfg,
a1c5d2ca
M
1845 session_token_list=[xsrf_token])
1846
1847 for comment in comment_iter:
1848 if isinstance(comment, int):
1849 estimated_total = comment
1850 continue
1851 comments.append(comment)
1852 break
d92f5d5a 1853 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1854 return {
1855 'comments': comments,
1856 'comment_count': len(comments),
1857 }
1858
c5e8d7af 1859 def _real_extract(self, url):
cf7e015f 1860 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1861 video_id = self._match_id(url)
9297939e 1862
1863 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
1864
545cc85d 1865 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1866 webpage_url = base_url + 'watch?v=' + video_id
1867 webpage = self._download_webpage(
cce889b9 1868 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1869
9297939e 1870 def get_text(x):
1871 if not x:
1872 return
1873 text = x.get('simpleText')
1874 if text and isinstance(text, compat_str):
1875 return text
1876 runs = x.get('runs')
1877 if not isinstance(runs, list):
1878 return
1879 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
1880
1881 ytm_streaming_data = {}
1882 if is_music_url:
1883 # we are forcing to use parse_json because 141 only appeared in get_video_info.
1884 # el, c, cver, cplayer field required for 141(aac 256kbps) codec
1885 # maybe paramter of youtube music player?
1886 ytm_player_response = self._parse_json(try_get(compat_parse_qs(
1887 self._download_webpage(
1888 base_url + 'get_video_info', video_id,
fe03a6cd 1889 'Fetching youtube music info webpage',
1890 'unable to download youtube music info webpage', query={
9297939e 1891 'video_id': video_id,
1892 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1893 'el': 'detailpage',
1894 'c': 'WEB_REMIX',
1895 'cver': '0.1',
1896 'cplayer': 'UNIPLAYER'
1897 }, fatal=False)),
1898 lambda x: x['player_response'][0],
1899 compat_str) or '{}', video_id)
1900 ytm_streaming_data = ytm_player_response.get('streamingData') or {}
1901
545cc85d 1902 player_response = None
1903 if webpage:
1904 player_response = self._extract_yt_initial_variable(
1905 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1906 video_id, 'initial player response')
f4f751af 1907
1908 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1909 if not player_response:
1910 player_response = self._call_api(
f4f751af 1911 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1912
1913 playability_status = player_response.get('playabilityStatus') or {}
1914 if playability_status.get('reason') == 'Sign in to confirm your age':
1915 pr = self._parse_json(try_get(compat_parse_qs(
1916 self._download_webpage(
1917 base_url + 'get_video_info', video_id,
1918 'Refetching age-gated info webpage',
1919 'unable to download video info webpage', query={
1920 'video_id': video_id,
7c60c33e 1921 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1922 }, fatal=False)),
1923 lambda x: x['player_response'][0],
1924 compat_str) or '{}', video_id)
1925 if pr:
1926 player_response = pr
1927
1928 trailer_video_id = try_get(
1929 playability_status,
1930 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1931 compat_str)
1932 if trailer_video_id:
1933 return self.url_result(
1934 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1935
545cc85d 1936 search_meta = (
1937 lambda x: self._html_search_meta(x, webpage, default=None)) \
1938 if webpage else lambda x: None
dbdaaa23 1939
545cc85d 1940 video_details = player_response.get('videoDetails') or {}
37357d21 1941 microformat = try_get(
545cc85d 1942 player_response,
1943 lambda x: x['microformat']['playerMicroformatRenderer'],
1944 dict) or {}
1945 video_title = video_details.get('title') \
1946 or get_text(microformat.get('title')) \
1947 or search_meta(['og:title', 'twitter:title', 'title'])
1948 video_description = video_details.get('shortDescription')
cf7e015f 1949
8fe10494 1950 if not smuggled_data.get('force_singlefeed', False):
a06916d9 1951 if not self.get_param('noplaylist'):
8fe10494
S
1952 multifeed_metadata_list = try_get(
1953 player_response,
1954 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1955 compat_str)
8fe10494
S
1956 if multifeed_metadata_list:
1957 entries = []
1958 feed_ids = []
1959 for feed in multifeed_metadata_list.split(','):
1960 # Unquote should take place before split on comma (,) since textual
1961 # fields may contain comma as well (see
067aa17e 1962 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1963 feed_data = compat_parse_qs(
1964 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1965
1966 def feed_entry(name):
545cc85d 1967 return try_get(
1968 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1969
1970 feed_id = feed_entry('id')
1971 if not feed_id:
1972 continue
1973 feed_title = feed_entry('title')
1974 title = video_title
1975 if feed_title:
1976 title += ' (%s)' % feed_title
8fe10494
S
1977 entries.append({
1978 '_type': 'url_transparent',
1979 'ie_key': 'Youtube',
1980 'url': smuggle_url(
545cc85d 1981 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1982 {'force_singlefeed': True}),
6b09401b 1983 'title': title,
8fe10494 1984 })
6b09401b 1985 feed_ids.append(feed_id)
8fe10494
S
1986 self.to_screen(
1987 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1988 % (', '.join(feed_ids), video_id))
545cc85d 1989 return self.playlist_result(
1990 entries, video_id, video_title, video_description)
8fe10494
S
1991 else:
1992 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1993
9297939e 1994 formats, itags, stream_ids = [], [], []
cc2db878 1995 itag_qualities = {}
545cc85d 1996 player_url = None
dca3ff4a 1997 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
9297939e 1998
545cc85d 1999 streaming_data = player_response.get('streamingData') or {}
2000 streaming_formats = streaming_data.get('formats') or []
2001 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2002 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2003 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2004
545cc85d 2005 for fmt in streaming_formats:
2006 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2007 continue
321bf820 2008
cc2db878 2009 itag = str_or_none(fmt.get('itag'))
9297939e 2010 audio_track = fmt.get('audioTrack') or {}
2011 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2012 if stream_id in stream_ids:
2013 continue
2014
cc2db878 2015 quality = fmt.get('quality')
2016 if itag and quality:
2017 itag_qualities[itag] = quality
2018 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2019 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2020 # number of fragment that would subsequently requested with (`&sq=N`)
2021 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2022 continue
2023
545cc85d 2024 fmt_url = fmt.get('url')
2025 if not fmt_url:
2026 sc = compat_parse_qs(fmt.get('signatureCipher'))
2027 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2028 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2029 if not (sc and fmt_url and encrypted_sig):
2030 continue
2031 if not player_url:
2032 if not webpage:
2033 continue
2034 player_url = self._search_regex(
2035 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
2036 webpage, 'player URL', fatal=False)
2037 if not player_url:
201e9eaa 2038 continue
545cc85d 2039 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2040 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2041 fmt_url += '&' + sp + '=' + signature
2042
545cc85d 2043 if itag:
2044 itags.append(itag)
9297939e 2045 stream_ids.append(stream_id)
2046
cc2db878 2047 tbr = float_or_none(
2048 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2049 dct = {
2050 'asr': int_or_none(fmt.get('audioSampleRate')),
2051 'filesize': int_or_none(fmt.get('contentLength')),
2052 'format_id': itag,
0fb983f6 2053 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2054 'fps': int_or_none(fmt.get('fps')),
2055 'height': int_or_none(fmt.get('height')),
dca3ff4a 2056 'quality': q(quality),
cc2db878 2057 'tbr': tbr,
545cc85d 2058 'url': fmt_url,
2059 'width': fmt.get('width'),
0fb983f6 2060 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2061 }
2062 mimetype = fmt.get('mimeType')
2063 if mimetype:
2064 mobj = re.match(
2065 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2066 if mobj:
2067 dct['ext'] = mimetype2ext(mobj.group(1))
2068 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2069 no_audio = dct.get('acodec') == 'none'
2070 no_video = dct.get('vcodec') == 'none'
2071 if no_audio:
2072 dct['vbr'] = tbr
2073 if no_video:
2074 dct['abr'] = tbr
2075 if no_audio or no_video:
545cc85d 2076 dct['downloader_options'] = {
2077 # Youtube throttles chunks >~10M
2078 'http_chunk_size': 10485760,
bf1317d2 2079 }
7c60c33e 2080 if dct.get('ext'):
2081 dct['container'] = dct['ext'] + '_dash'
545cc85d 2082 formats.append(dct)
2083
9297939e 2084 for sd in (streaming_data, ytm_streaming_data):
2085 hls_manifest_url = sd.get('hlsManifestUrl')
2086 if hls_manifest_url:
2087 for f in self._extract_m3u8_formats(
2088 hls_manifest_url, video_id, 'mp4', fatal=False):
2089 itag = self._search_regex(
2090 r'/itag/(\d+)', f['url'], 'itag', default=None)
2091 if itag:
2092 f['format_id'] = itag
545cc85d 2093 formats.append(f)
2094
a06916d9 2095 if self.get_param('youtube_include_dash_manifest', True):
9297939e 2096 for sd in (streaming_data, ytm_streaming_data):
2097 dash_manifest_url = sd.get('dashManifestUrl')
2098 if dash_manifest_url:
2099 for f in self._extract_mpd_formats(
2100 dash_manifest_url, video_id, fatal=False):
2101 itag = f['format_id']
2102 if itag in itags:
2103 continue
2104 if itag in itag_qualities:
2105 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
2106 # but kept to maintain feature parity (and code similarity) with youtube-dl
2107 # Remove if this causes any issues with sorting in future
2108 f['quality'] = q(itag_qualities[itag])
2109 filesize = int_or_none(self._search_regex(
2110 r'/clen/(\d+)', f.get('fragment_base_url')
2111 or f['url'], 'file size', default=None))
2112 if filesize:
2113 f['filesize'] = filesize
2114 formats.append(f)
bf1317d2 2115
545cc85d 2116 if not formats:
a06916d9 2117 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2118 self.raise_no_formats(
545cc85d 2119 'This video is DRM protected.', expected=True)
2120 pemr = try_get(
2121 playability_status,
2122 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2123 dict) or {}
2124 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2125 subreason = pemr.get('subreason')
2126 if subreason:
2127 subreason = clean_html(get_text(subreason))
2128 if subreason == 'The uploader has not made this video available in your country.':
2129 countries = microformat.get('availableCountries')
2130 if not countries:
2131 regions_allowed = search_meta('regionsAllowed')
2132 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2133 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2134 reason += '\n' + subreason
2135 if reason:
b7da73eb 2136 self.raise_no_formats(reason, expected=True)
bf1317d2 2137
545cc85d 2138 self._sort_formats(formats)
bf1317d2 2139
545cc85d 2140 keywords = video_details.get('keywords') or []
2141 if not keywords and webpage:
2142 keywords = [
2143 unescapeHTML(m.group('content'))
2144 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2145 for keyword in keywords:
2146 if keyword.startswith('yt:stretch='):
201c1459 2147 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2148 if mobj:
2149 # NB: float is intentional for forcing float division
2150 w, h = (float(v) for v in mobj.groups())
2151 if w > 0 and h > 0:
2152 ratio = w / h
2153 for f in formats:
2154 if f.get('vcodec') != 'none':
2155 f['stretched_ratio'] = ratio
2156 break
6449cd80 2157
545cc85d 2158 thumbnails = []
2159 for container in (video_details, microformat):
2160 for thumbnail in (try_get(
2161 container,
2162 lambda x: x['thumbnail']['thumbnails'], list) or []):
2163 thumbnail_url = thumbnail.get('url')
2164 if not thumbnail_url:
bf1317d2 2165 continue
1988fab7 2166 # Sometimes youtube gives a wrong thumbnail URL. See:
2167 # https://github.com/yt-dlp/yt-dlp/issues/233
2168 # https://github.com/ytdl-org/youtube-dl/issues/28023
2169 if 'maxresdefault' in thumbnail_url:
2170 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2171 thumbnails.append({
545cc85d 2172 'url': thumbnail_url,
ff2751ac 2173 'height': int_or_none(thumbnail.get('height')),
545cc85d 2174 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2175 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2176 })
ff2751ac 2177 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2178 if thumbnail_url:
2179 thumbnails.append({
2180 'url': thumbnail_url,
2181 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2182 })
2183 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2184 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2185 thumbnails.append({
2186 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2187 'preference': 1,
2188 })
2189 self._remove_duplicate_formats(thumbnails)
545cc85d 2190
2191 category = microformat.get('category') or search_meta('genre')
2192 channel_id = video_details.get('channelId') \
2193 or microformat.get('externalChannelId') \
2194 or search_meta('channelId')
2195 duration = int_or_none(
2196 video_details.get('lengthSeconds')
2197 or microformat.get('lengthSeconds')) \
2198 or parse_duration(search_meta('duration'))
2199 is_live = video_details.get('isLive')
2200 owner_profile_url = microformat.get('ownerProfileUrl')
2201
2202 info = {
2203 'id': video_id,
2204 'title': self._live_title(video_title) if is_live else video_title,
2205 'formats': formats,
2206 'thumbnails': thumbnails,
2207 'description': video_description,
2208 'upload_date': unified_strdate(
2209 microformat.get('uploadDate')
2210 or search_meta('uploadDate')),
2211 'uploader': video_details['author'],
2212 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2213 'uploader_url': owner_profile_url,
2214 'channel_id': channel_id,
2215 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2216 'duration': duration,
2217 'view_count': int_or_none(
2218 video_details.get('viewCount')
2219 or microformat.get('viewCount')
2220 or search_meta('interactionCount')),
2221 'average_rating': float_or_none(video_details.get('averageRating')),
2222 'age_limit': 18 if (
2223 microformat.get('isFamilySafe') is False
2224 or search_meta('isFamilyFriendly') == 'false'
2225 or search_meta('og:restrictions:age') == '18+') else 0,
2226 'webpage_url': webpage_url,
2227 'categories': [category] if category else None,
2228 'tags': keywords,
2229 'is_live': is_live,
2230 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2231 'was_live': video_details.get('isLiveContent'),
545cc85d 2232 }
b477fc13 2233
545cc85d 2234 pctr = try_get(
2235 player_response,
2236 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2237 subtitles = {}
2238 if pctr:
774d79cc 2239 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2240 lang_subs = container.setdefault(lang_code, [])
545cc85d 2241 for fmt in self._SUBTITLE_FORMATS:
2242 query.update({
2243 'fmt': fmt,
2244 })
2245 lang_subs.append({
2246 'ext': fmt,
2247 'url': update_url_query(base_url, query),
774d79cc 2248 'name': sub_name,
545cc85d 2249 })
7e72694b 2250
545cc85d 2251 for caption_track in (pctr.get('captionTracks') or []):
2252 base_url = caption_track.get('baseUrl')
2253 if not base_url:
2254 continue
2255 if caption_track.get('kind') != 'asr':
120916da 2256 lang_code = (
2257 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2258 or caption_track.get('languageCode'))
545cc85d 2259 if not lang_code:
2260 continue
2261 process_language(
774d79cc 2262 subtitles, base_url, lang_code,
2263 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2264 {})
545cc85d 2265 continue
2266 automatic_captions = {}
2267 for translation_language in (pctr.get('translationLanguages') or []):
2268 translation_language_code = translation_language.get('languageCode')
2269 if not translation_language_code:
2270 continue
2271 process_language(
2272 automatic_captions, base_url, translation_language_code,
774d79cc 2273 try_get(translation_language, lambda x: x['languageName']['simpleText']),
545cc85d 2274 {'tlang': translation_language_code})
2275 info['automatic_captions'] = automatic_captions
2276 info['subtitles'] = subtitles
7e72694b 2277
545cc85d 2278 parsed_url = compat_urllib_parse_urlparse(url)
2279 for component in [parsed_url.fragment, parsed_url.query]:
2280 query = compat_parse_qs(component)
2281 for k, v in query.items():
2282 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2283 d_k += '_time'
2284 if d_k not in info and k in s_ks:
2285 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2286
2287 # Youtube Music Auto-generated description
822b9d9c 2288 if video_description:
38d70284 2289 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2290 if mobj:
822b9d9c
RA
2291 release_year = mobj.group('release_year')
2292 release_date = mobj.group('release_date')
2293 if release_date:
2294 release_date = release_date.replace('-', '')
2295 if not release_year:
545cc85d 2296 release_year = release_date[:4]
2297 info.update({
2298 'album': mobj.group('album'.strip()),
2299 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2300 'track': mobj.group('track').strip(),
2301 'release_date': release_date,
cc2db878 2302 'release_year': int_or_none(release_year),
545cc85d 2303 })
7e72694b 2304
545cc85d 2305 initial_data = None
2306 if webpage:
2307 initial_data = self._extract_yt_initial_variable(
2308 webpage, self._YT_INITIAL_DATA_RE, video_id,
2309 'yt initial data')
2310 if not initial_data:
2311 initial_data = self._call_api(
f4f751af 2312 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2313
2314 if not is_live:
2315 try:
2316 # This will error if there is no livechat
2317 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2318 info['subtitles']['live_chat'] = [{
394dcd44 2319 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2320 'video_id': video_id,
2321 'ext': 'json',
2322 'protocol': 'youtube_live_chat_replay',
2323 }]
2324 except (KeyError, IndexError, TypeError):
2325 pass
2326
2327 if initial_data:
2328 chapters = self._extract_chapters_from_json(
2329 initial_data, video_id, duration)
2330 if not chapters:
2331 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2332 contents = try_get(
2333 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2334 list)
2335 if not contents:
2336 continue
2337
2338 def chapter_time(mmlir):
2339 return parse_duration(
2340 get_text(mmlir.get('timeDescription')))
2341
2342 chapters = []
2343 for next_num, content in enumerate(contents, start=1):
2344 mmlir = content.get('macroMarkersListItemRenderer') or {}
2345 start_time = chapter_time(mmlir)
2346 end_time = chapter_time(try_get(
2347 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2348 if next_num < len(contents) else duration
2349 if start_time is None or end_time is None:
2350 continue
2351 chapters.append({
2352 'start_time': start_time,
2353 'end_time': end_time,
2354 'title': get_text(mmlir.get('title')),
2355 })
2356 if chapters:
2357 break
2358 if chapters:
2359 info['chapters'] = chapters
2360
2361 contents = try_get(
2362 initial_data,
2363 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2364 list) or []
2365 for content in contents:
2366 vpir = content.get('videoPrimaryInfoRenderer')
2367 if vpir:
2368 stl = vpir.get('superTitleLink')
2369 if stl:
2370 stl = get_text(stl)
2371 if try_get(
2372 vpir,
2373 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2374 info['location'] = stl
2375 else:
2376 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2377 if mobj:
2378 info.update({
2379 'series': mobj.group(1),
2380 'season_number': int(mobj.group(2)),
2381 'episode_number': int(mobj.group(3)),
2382 })
2383 for tlb in (try_get(
2384 vpir,
2385 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2386 list) or []):
2387 tbr = tlb.get('toggleButtonRenderer') or {}
2388 for getter, regex in [(
2389 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2390 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2391 lambda x: x['accessibility'],
2392 lambda x: x['accessibilityData']['accessibilityData'],
2393 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2394 label = (try_get(tbr, getter, dict) or {}).get('label')
2395 if label:
2396 mobj = re.match(regex, label)
2397 if mobj:
2398 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2399 break
2400 sbr_tooltip = try_get(
2401 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2402 if sbr_tooltip:
2403 like_count, dislike_count = sbr_tooltip.split(' / ')
2404 info.update({
2405 'like_count': str_to_int(like_count),
2406 'dislike_count': str_to_int(dislike_count),
2407 })
2408 vsir = content.get('videoSecondaryInfoRenderer')
2409 if vsir:
2410 info['channel'] = get_text(try_get(
2411 vsir,
2412 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2413 dict))
545cc85d 2414 rows = try_get(
2415 vsir,
2416 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2417 list) or []
2418 multiple_songs = False
2419 for row in rows:
2420 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2421 multiple_songs = True
2422 break
2423 for row in rows:
2424 mrr = row.get('metadataRowRenderer') or {}
2425 mrr_title = mrr.get('title')
2426 if not mrr_title:
2427 continue
2428 mrr_title = get_text(mrr['title'])
2429 mrr_contents_text = get_text(mrr['contents'][0])
2430 if mrr_title == 'License':
2431 info['license'] = mrr_contents_text
2432 elif not multiple_songs:
2433 if mrr_title == 'Album':
2434 info['album'] = mrr_contents_text
2435 elif mrr_title == 'Artist':
2436 info['artist'] = mrr_contents_text
2437 elif mrr_title == 'Song':
2438 info['track'] = mrr_contents_text
2439
2440 fallbacks = {
2441 'channel': 'uploader',
2442 'channel_id': 'uploader_id',
2443 'channel_url': 'uploader_url',
2444 }
2445 for to, frm in fallbacks.items():
2446 if not info.get(to):
2447 info[to] = info.get(frm)
2448
2449 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2450 v = info.get(s_k)
2451 if v:
2452 info[d_k] = v
b84071c0 2453
c224251a
M
2454 is_private = bool_or_none(video_details.get('isPrivate'))
2455 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2456 is_membersonly = None
b28f8d24 2457 is_premium = None
c224251a
M
2458 if initial_data and is_private is not None:
2459 is_membersonly = False
b28f8d24 2460 is_premium = False
c224251a
M
2461 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2462 for content in contents or []:
2463 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2464 for badge in badges or []:
2465 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2466 if label.lower() == 'members only':
2467 is_membersonly = True
2468 break
b28f8d24
M
2469 elif label.lower() == 'premium':
2470 is_premium = True
2471 break
2472 if is_membersonly or is_premium:
c224251a
M
2473 break
2474
2475 # TODO: Add this for playlists
2476 info['availability'] = self._availability(
2477 is_private=is_private,
b28f8d24 2478 needs_premium=is_premium,
c224251a
M
2479 needs_subscription=is_membersonly,
2480 needs_auth=info['age_limit'] >= 18,
2481 is_unlisted=None if is_private is None else is_unlisted)
2482
06167fbb 2483 # get xsrf for annotations or comments
a06916d9 2484 get_annotations = self.get_param('writeannotations', False)
2485 get_comments = self.get_param('getcomments', False)
06167fbb 2486 if get_annotations or get_comments:
29f7c58a 2487 xsrf_token = None
545cc85d 2488 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2489 if ytcfg:
2490 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2491 if not xsrf_token:
2492 xsrf_token = self._search_regex(
2493 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2494 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2495
2496 # annotations
06167fbb 2497 if get_annotations:
64b6a4e9
RA
2498 invideo_url = try_get(
2499 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2500 if xsrf_token and invideo_url:
29f7c58a 2501 xsrf_field_name = None
2502 if ytcfg:
2503 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2504 if not xsrf_field_name:
2505 xsrf_field_name = self._search_regex(
2506 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2507 webpage, 'xsrf field name',
29f7c58a 2508 group='xsrf_field_name', default='session_token')
8a784c74 2509 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2510 self._proto_relative_url(invideo_url),
2511 video_id, note='Downloading annotations',
2512 errnote='Unable to download video annotations', fatal=False,
2513 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2514
277d6ff5 2515 if get_comments:
a1c5d2ca 2516 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2517
545cc85d 2518 self.mark_watched(video_id, player_response)
d77ab8e2 2519
545cc85d 2520 return info
c5e8d7af 2521
5f6a1245 2522
8bdd16b4 2523class YoutubeTabIE(YoutubeBaseInfoExtractor):
2524 IE_DESC = 'YouTube.com tab'
70d5c17b 2525 _VALID_URL = r'''(?x)
2526 https?://
2527 (?:\w+\.)?
2528 (?:
2529 youtube(?:kids)?\.com|
2530 invidio\.us
2531 )/
2532 (?:
fe03a6cd 2533 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 2534 (?P<not_channel>
9ba5705a 2535 feed/|hashtag/|
70d5c17b 2536 (?:playlist|watch)\?.*?\blist=
2537 )|
29f7c58a 2538 (?!(?:%s)\b) # Direct URLs
70d5c17b 2539 )
2540 (?P<id>[^/?\#&]+)
2541 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2542 IE_NAME = 'youtube:tab'
2543
81127aa5 2544 _TESTS = [{
da692b79 2545 'note': 'playlists, multipage',
8bdd16b4 2546 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2547 'playlist_mincount': 94,
2548 'info_dict': {
2549 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2550 'title': 'Игорь Клейнер - Playlists',
2551 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2552 'uploader': 'Игорь Клейнер',
2553 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2554 },
2555 }, {
da692b79 2556 'note': 'playlists, multipage, different order',
8bdd16b4 2557 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2558 'playlist_mincount': 94,
2559 'info_dict': {
2560 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2561 'title': 'Игорь Клейнер - Playlists',
2562 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2563 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2564 'uploader': 'Игорь Клейнер',
8bdd16b4 2565 },
201c1459 2566 }, {
da692b79 2567 'note': 'playlists, series',
201c1459 2568 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2569 'playlist_mincount': 5,
2570 'info_dict': {
2571 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2572 'title': '3Blue1Brown - Playlists',
2573 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 2574 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2575 'uploader': '3Blue1Brown',
201c1459 2576 },
8bdd16b4 2577 }, {
da692b79 2578 'note': 'playlists, singlepage',
8bdd16b4 2579 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2580 'playlist_mincount': 4,
2581 'info_dict': {
2582 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2583 'title': 'ThirstForScience - Playlists',
2584 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2585 'uploader': 'ThirstForScience',
2586 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2587 }
2588 }, {
2589 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2590 'only_matching': True,
2591 }, {
da692b79 2592 'note': 'basic, single video playlist',
0e30a7b9 2593 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2594 'info_dict': {
0e30a7b9 2595 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2596 'uploader': 'Sergey M.',
2597 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2598 'title': 'youtube-dl public playlist',
81127aa5 2599 },
0e30a7b9 2600 'playlist_count': 1,
9291475f 2601 }, {
da692b79 2602 'note': 'empty playlist',
0e30a7b9 2603 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2604 'info_dict': {
0e30a7b9 2605 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2606 'uploader': 'Sergey M.',
2607 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2608 'title': 'youtube-dl empty playlist',
9291475f
PH
2609 },
2610 'playlist_count': 0,
2611 }, {
da692b79 2612 'note': 'Home tab',
8bdd16b4 2613 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2614 'info_dict': {
8bdd16b4 2615 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2616 'title': 'lex will - Home',
2617 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2618 'uploader': 'lex will',
2619 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2620 },
8bdd16b4 2621 'playlist_mincount': 2,
9291475f 2622 }, {
da692b79 2623 'note': 'Videos tab',
8bdd16b4 2624 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2625 'info_dict': {
8bdd16b4 2626 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2627 'title': 'lex will - Videos',
2628 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2629 'uploader': 'lex will',
2630 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2631 },
8bdd16b4 2632 'playlist_mincount': 975,
9291475f 2633 }, {
da692b79 2634 'note': 'Videos tab, sorted by popular',
8bdd16b4 2635 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2636 'info_dict': {
8bdd16b4 2637 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2638 'title': 'lex will - Videos',
2639 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2640 'uploader': 'lex will',
2641 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2642 },
8bdd16b4 2643 'playlist_mincount': 199,
9291475f 2644 }, {
da692b79 2645 'note': 'Playlists tab',
8bdd16b4 2646 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2647 'info_dict': {
8bdd16b4 2648 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2649 'title': 'lex will - Playlists',
2650 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2651 'uploader': 'lex will',
2652 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2653 },
8bdd16b4 2654 'playlist_mincount': 17,
ac7553d0 2655 }, {
da692b79 2656 'note': 'Community tab',
8bdd16b4 2657 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2658 'info_dict': {
8bdd16b4 2659 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2660 'title': 'lex will - Community',
2661 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2662 'uploader': 'lex will',
2663 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2664 },
2665 'playlist_mincount': 18,
87dadd45 2666 }, {
da692b79 2667 'note': 'Channels tab',
8bdd16b4 2668 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2669 'info_dict': {
8bdd16b4 2670 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2671 'title': 'lex will - Channels',
2672 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2673 'uploader': 'lex will',
2674 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2675 },
deaec5af 2676 'playlist_mincount': 12,
cd684175 2677 }, {
2678 'note': 'Search tab',
2679 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
2680 'playlist_mincount': 40,
2681 'info_dict': {
2682 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2683 'title': '3Blue1Brown - Search - linear algebra',
2684 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2685 'uploader': '3Blue1Brown',
2686 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2687 },
6b08cdf6 2688 }, {
a0566bbf 2689 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2690 'only_matching': True,
2691 }, {
a0566bbf 2692 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2693 'only_matching': True,
2694 }, {
a0566bbf 2695 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2696 'only_matching': True,
2697 }, {
2698 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2699 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2700 'info_dict': {
2701 'title': '29C3: Not my department',
2702 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2703 'uploader': 'Christiaan008',
2704 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2705 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2706 },
2707 'playlist_count': 96,
2708 }, {
2709 'note': 'Large playlist',
2710 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2711 'info_dict': {
8bdd16b4 2712 'title': 'Uploads from Cauchemar',
2713 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2714 'uploader': 'Cauchemar',
2715 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2716 },
8bdd16b4 2717 'playlist_mincount': 1123,
2718 }, {
da692b79 2719 'note': 'even larger playlist, 8832 videos',
8bdd16b4 2720 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2721 'only_matching': True,
4b7df0d3
JMF
2722 }, {
2723 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2724 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2725 'info_dict': {
acf757f4
PH
2726 'title': 'Uploads from Interstellar Movie',
2727 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2728 'uploader': 'Interstellar Movie',
8bdd16b4 2729 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2730 },
481cc733 2731 'playlist_mincount': 21,
358de58c 2732 }, {
2733 'note': 'Playlist with "show unavailable videos" button',
2734 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2735 'info_dict': {
2736 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2737 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2738 'uploader': 'Phim Siêu Nhân Nhật Bản',
2739 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2740 },
da692b79 2741 'playlist_mincount': 200,
5d342002 2742 }, {
da692b79 2743 'note': 'Playlist with unavailable videos in page 7',
5d342002 2744 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2745 'info_dict': {
2746 'title': 'Uploads from BlankTV',
2747 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2748 'uploader': 'BlankTV',
2749 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2750 },
da692b79 2751 'playlist_mincount': 1000,
8bdd16b4 2752 }, {
da692b79 2753 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 2754 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2755 'info_dict': {
2756 'title': 'Data Analysis with Dr Mike Pound',
2757 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2758 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2759 'uploader': 'Computerphile',
deaec5af 2760 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2761 },
2762 'playlist_mincount': 11,
2763 }, {
a0566bbf 2764 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2765 'only_matching': True,
dacb3a86 2766 }, {
da692b79 2767 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
2768 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2769 'info_dict': {
2770 'id': 'FqZTN594JQw',
2771 'ext': 'webm',
2772 'title': "Smiley's People 01 detective, Adventure Series, Action",
2773 'uploader': 'STREEM',
2774 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2775 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2776 'upload_date': '20150526',
2777 'license': 'Standard YouTube License',
2778 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2779 'categories': ['People & Blogs'],
2780 'tags': list,
dbdaaa23 2781 'view_count': int,
dacb3a86
S
2782 'like_count': int,
2783 'dislike_count': int,
2784 },
2785 'params': {
2786 'skip_download': True,
2787 },
13a75688 2788 'skip': 'This video is not available.',
dacb3a86 2789 'add_ie': [YoutubeIE.ie_key()],
481cc733 2790 }, {
8bdd16b4 2791 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2792 'only_matching': True,
66b48727 2793 }, {
8bdd16b4 2794 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2795 'only_matching': True,
a0566bbf 2796 }, {
2797 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2798 'info_dict': {
da692b79 2799 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 2800 'ext': 'mp4',
deaec5af 2801 'title': compat_str,
a0566bbf 2802 'uploader': 'Sky News',
2803 'uploader_id': 'skynews',
2804 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 2805 'upload_date': r're:\d{8}',
2806 'description': compat_str,
a0566bbf 2807 'categories': ['News & Politics'],
2808 'tags': list,
2809 'like_count': int,
2810 'dislike_count': int,
2811 },
2812 'params': {
2813 'skip_download': True,
2814 },
da692b79 2815 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 2816 }, {
2817 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2818 'info_dict': {
2819 'id': 'a48o2S1cPoo',
2820 'ext': 'mp4',
2821 'title': 'The Young Turks - Live Main Show',
2822 'uploader': 'The Young Turks',
2823 'uploader_id': 'TheYoungTurks',
2824 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2825 'upload_date': '20150715',
2826 'license': 'Standard YouTube License',
2827 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2828 'categories': ['News & Politics'],
2829 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2830 'like_count': int,
2831 'dislike_count': int,
2832 },
2833 'params': {
2834 'skip_download': True,
2835 },
2836 'only_matching': True,
2837 }, {
2838 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2839 'only_matching': True,
2840 }, {
2841 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2842 'only_matching': True,
3d3dddc9 2843 }, {
2844 'url': 'https://www.youtube.com/feed/trending',
2845 'only_matching': True,
2846 }, {
3d3dddc9 2847 'url': 'https://www.youtube.com/feed/library',
2848 'only_matching': True,
2849 }, {
3d3dddc9 2850 'url': 'https://www.youtube.com/feed/history',
2851 'only_matching': True,
2852 }, {
3d3dddc9 2853 'url': 'https://www.youtube.com/feed/subscriptions',
2854 'only_matching': True,
2855 }, {
3d3dddc9 2856 'url': 'https://www.youtube.com/feed/watch_later',
2857 'only_matching': True,
2858 }, {
da692b79 2859 'note': 'Recommended - redirects to home page',
3d3dddc9 2860 'url': 'https://www.youtube.com/feed/recommended',
2861 'only_matching': True,
29f7c58a 2862 }, {
da692b79 2863 'note': 'inline playlist with not always working continuations',
29f7c58a 2864 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2865 'only_matching': True,
2866 }, {
2867 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2868 'only_matching': True,
2869 }, {
2870 'url': 'https://www.youtube.com/course',
2871 'only_matching': True,
2872 }, {
2873 'url': 'https://www.youtube.com/zsecurity',
2874 'only_matching': True,
2875 }, {
2876 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2877 'only_matching': True,
2878 }, {
2879 'url': 'https://www.youtube.com/TheYoungTurks/live',
2880 'only_matching': True,
39ed931e 2881 }, {
2882 'url': 'https://www.youtube.com/hashtag/cctv9',
2883 'info_dict': {
2884 'id': 'cctv9',
2885 'title': '#cctv9',
2886 },
2887 'playlist_mincount': 350,
201c1459 2888 }, {
2889 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2890 'only_matching': True,
9297939e 2891 }, {
da692b79 2892 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 2893 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2894 'only_matching': True
fe03a6cd 2895 }, {
2896 'note': '/browse/ should redirect to /channel/',
2897 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
2898 'only_matching': True
2899 }, {
2900 'note': 'VLPL, should redirect to playlist?list=PL...',
2901 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2902 'info_dict': {
2903 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2904 'uploader': 'NoCopyrightSounds',
2905 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
2906 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
2907 'title': 'NCS Releases',
2908 },
2909 'playlist_mincount': 166,
18db7548 2910 }, {
2911 'note': 'Topic, should redirect to playlist?list=UU...',
2912 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
2913 'info_dict': {
2914 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
2915 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
2916 'title': 'Uploads from Royalty Free Music - Topic',
2917 'uploader': 'Royalty Free Music - Topic',
2918 },
2919 'expected_warnings': [
2920 'A channel/user page was given',
2921 'The URL does not have a videos tab',
2922 ],
2923 'playlist_mincount': 101,
2924 }, {
2925 'note': 'Topic without a UU playlist',
2926 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
2927 'info_dict': {
2928 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
2929 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
2930 },
2931 'expected_warnings': [
2932 'A channel/user page was given',
2933 'The URL does not have a videos tab',
2934 'Falling back to channel URL',
2935 ],
2936 'playlist_mincount': 9,
abcdd12b 2937 }, {
2938 'note': 'Youtube music Album',
2939 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
2940 'info_dict': {
2941 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
2942 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
2943 },
2944 'playlist_count': 50,
29f7c58a 2945 }]
2946
2947 @classmethod
2948 def suitable(cls, url):
2949 return False if YoutubeIE.suitable(url) else super(
2950 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2951
2952 def _extract_channel_id(self, webpage):
2953 channel_id = self._html_search_meta(
2954 'channelId', webpage, 'channel id', default=None)
2955 if channel_id:
2956 return channel_id
2957 channel_url = self._html_search_meta(
2958 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2959 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2960 'twitter:app:url:googleplay'), webpage, 'channel url')
2961 return self._search_regex(
2962 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2963 channel_url, 'channel id')
15f6397c 2964
8bdd16b4 2965 @staticmethod
cd7c66cf 2966 def _extract_basic_item_renderer(item):
2967 # Modified from _extract_grid_item_renderer
201c1459 2968 known_basic_renderers = (
2969 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 2970 )
2971 for key, renderer in item.items():
201c1459 2972 if not isinstance(renderer, dict):
cd7c66cf 2973 continue
201c1459 2974 elif key in known_basic_renderers:
2975 return renderer
2976 elif key.startswith('grid') and key.endswith('Renderer'):
2977 return renderer
8bdd16b4 2978
8bdd16b4 2979 def _grid_entries(self, grid_renderer):
2980 for item in grid_renderer['items']:
2981 if not isinstance(item, dict):
39b62db1 2982 continue
cd7c66cf 2983 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2984 if not isinstance(renderer, dict):
2985 continue
2986 title = try_get(
201c1459 2987 renderer, (lambda x: x['title']['runs'][0]['text'],
2988 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 2989 # playlist
2990 playlist_id = renderer.get('playlistId')
2991 if playlist_id:
2992 yield self.url_result(
2993 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2994 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2995 video_title=title)
201c1459 2996 continue
8bdd16b4 2997 # video
2998 video_id = renderer.get('videoId')
2999 if video_id:
3000 yield self._extract_video(renderer)
201c1459 3001 continue
8bdd16b4 3002 # channel
3003 channel_id = renderer.get('channelId')
3004 if channel_id:
3005 title = try_get(
3006 renderer, lambda x: x['title']['simpleText'], compat_str)
3007 yield self.url_result(
3008 'https://www.youtube.com/channel/%s' % channel_id,
3009 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3010 continue
3011 # generic endpoint URL support
3012 ep_url = urljoin('https://www.youtube.com/', try_get(
3013 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3014 compat_str))
3015 if ep_url:
3016 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3017 if ie.suitable(ep_url):
3018 yield self.url_result(
3019 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3020 break
8bdd16b4 3021
3d3dddc9 3022 def _shelf_entries_from_content(self, shelf_renderer):
3023 content = shelf_renderer.get('content')
3024 if not isinstance(content, dict):
8bdd16b4 3025 return
cd7c66cf 3026 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3027 if renderer:
3028 # TODO: add support for nested playlists so each shelf is processed
3029 # as separate playlist
3030 # TODO: this includes only first N items
3031 for entry in self._grid_entries(renderer):
3032 yield entry
3033 renderer = content.get('horizontalListRenderer')
3034 if renderer:
3035 # TODO
3036 pass
8bdd16b4 3037
29f7c58a 3038 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3039 ep = try_get(
3040 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3041 compat_str)
3042 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3043 if shelf_url:
29f7c58a 3044 # Skipping links to another channels, note that checking for
3045 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3046 # will not work
3047 if skip_channels and '/channels?' in shelf_url:
3048 return
3d3dddc9 3049 title = try_get(
3050 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3051 yield self.url_result(shelf_url, video_title=title)
3052 # Shelf may not contain shelf URL, fallback to extraction from content
3053 for entry in self._shelf_entries_from_content(shelf_renderer):
3054 yield entry
c5e8d7af 3055
8bdd16b4 3056 def _playlist_entries(self, video_list_renderer):
3057 for content in video_list_renderer['contents']:
3058 if not isinstance(content, dict):
3059 continue
3060 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3061 if not isinstance(renderer, dict):
3062 continue
3063 video_id = renderer.get('videoId')
3064 if not video_id:
3065 continue
3066 yield self._extract_video(renderer)
07aeced6 3067
3462ffa8 3068 def _rich_entries(self, rich_grid_renderer):
3069 renderer = try_get(
70d5c17b 3070 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3071 video_id = renderer.get('videoId')
3072 if not video_id:
3073 return
3074 yield self._extract_video(renderer)
3075
8bdd16b4 3076 def _video_entry(self, video_renderer):
3077 video_id = video_renderer.get('videoId')
3078 if video_id:
3079 return self._extract_video(video_renderer)
dacb3a86 3080
8bdd16b4 3081 def _post_thread_entries(self, post_thread_renderer):
3082 post_renderer = try_get(
3083 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3084 if not post_renderer:
3085 return
3086 # video attachment
3087 video_renderer = try_get(
895b0931 3088 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3089 video_id = video_renderer.get('videoId')
3090 if video_id:
3091 entry = self._extract_video(video_renderer)
8bdd16b4 3092 if entry:
3093 yield entry
895b0931 3094 # playlist attachment
3095 playlist_id = try_get(
3096 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3097 if playlist_id:
3098 yield self.url_result(
e28f1c0a 3099 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3100 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3101 # inline video links
3102 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3103 for run in runs:
3104 if not isinstance(run, dict):
3105 continue
3106 ep_url = try_get(
3107 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3108 if not ep_url:
3109 continue
3110 if not YoutubeIE.suitable(ep_url):
3111 continue
3112 ep_video_id = YoutubeIE._match_id(ep_url)
3113 if video_id == ep_video_id:
3114 continue
895b0931 3115 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3116
8bdd16b4 3117 def _post_thread_continuation_entries(self, post_thread_continuation):
3118 contents = post_thread_continuation.get('contents')
3119 if not isinstance(contents, list):
3120 return
3121 for content in contents:
3122 renderer = content.get('backstagePostThreadRenderer')
3123 if not isinstance(renderer, dict):
3124 continue
3125 for entry in self._post_thread_entries(renderer):
3126 yield entry
07aeced6 3127
39ed931e 3128 r''' # unused
3129 def _rich_grid_entries(self, contents):
3130 for content in contents:
3131 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3132 if video_renderer:
3133 entry = self._video_entry(video_renderer)
3134 if entry:
3135 yield entry
3136 '''
3137
29f7c58a 3138 @staticmethod
3139 def _build_continuation_query(continuation, ctp=None):
3140 query = {
3141 'ctoken': continuation,
3142 'continuation': continuation,
3143 }
3144 if ctp:
3145 query['itct'] = ctp
3146 return query
3147
8bdd16b4 3148 @staticmethod
3149 def _extract_next_continuation_data(renderer):
3150 next_continuation = try_get(
3151 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3152 if not next_continuation:
3153 return
3154 continuation = next_continuation.get('continuation')
3155 if not continuation:
3156 return
3157 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3158 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3159
8bdd16b4 3160 @classmethod
3161 def _extract_continuation(cls, renderer):
3162 next_continuation = cls._extract_next_continuation_data(renderer)
3163 if next_continuation:
3164 return next_continuation
cc2db878 3165 contents = []
3166 for key in ('contents', 'items'):
3167 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3168 for content in contents:
3169 if not isinstance(content, dict):
3170 continue
3171 continuation_ep = try_get(
3172 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3173 dict)
3174 if not continuation_ep:
3175 continue
3176 continuation = try_get(
3177 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3178 if not continuation:
3179 continue
3180 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3181 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3182
f4f751af 3183 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3184
70d5c17b 3185 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3186 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3187 for content in contents:
3188 if not isinstance(content, dict):
8bdd16b4 3189 continue
70d5c17b 3190 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3191 if not is_renderer:
70d5c17b 3192 renderer = content.get('richItemRenderer')
3462ffa8 3193 if renderer:
3194 for entry in self._rich_entries(renderer):
3195 yield entry
3196 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3197 continue
3462ffa8 3198 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3199 for isr_content in isr_contents:
3200 if not isinstance(isr_content, dict):
3201 continue
69184e41 3202
3203 known_renderers = {
3204 'playlistVideoListRenderer': self._playlist_entries,
3205 'gridRenderer': self._grid_entries,
3206 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3207 'backstagePostThreadRenderer': self._post_thread_entries,
3208 'videoRenderer': lambda x: [self._video_entry(x)],
3209 }
3210 for key, renderer in isr_content.items():
3211 if key not in known_renderers:
3212 continue
3213 for entry in known_renderers[key](renderer):
3214 if entry:
3215 yield entry
3462ffa8 3216 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3217 break
70d5c17b 3218
3462ffa8 3219 if not continuation_list[0]:
3220 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3221
3222 if not continuation_list[0]:
3223 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3224
3225 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3226 tab_content = try_get(tab, lambda x: x['content'], dict)
3227 if not tab_content:
3228 return
3462ffa8 3229 parent_renderer = (
29f7c58a 3230 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3231 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3232 for entry in extract_entries(parent_renderer):
3233 yield entry
3462ffa8 3234 continuation = continuation_list[0]
f4f751af 3235 context = self._extract_context(ytcfg)
3236 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3237
8bdd16b4 3238 for page_num in itertools.count(1):
3239 if not continuation:
3240 break
79360d99 3241 query = {
3242 'continuation': continuation['continuation'],
3243 'clickTracking': {'clickTrackingParams': continuation['itct']}
3244 }
f4f751af 3245 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3246 response = self._extract_response(
3247 item_id='%s page %s' % (item_id, page_num),
3248 query=query, headers=headers, ytcfg=ytcfg,
3249 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3250
3251 if not response:
8bdd16b4 3252 break
f4f751af 3253 visitor_data = try_get(
3254 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3255
69184e41 3256 known_continuation_renderers = {
3257 'playlistVideoListContinuation': self._playlist_entries,
3258 'gridContinuation': self._grid_entries,
3259 'itemSectionContinuation': self._post_thread_continuation_entries,
3260 'sectionListContinuation': extract_entries, # for feeds
3261 }
8bdd16b4 3262 continuation_contents = try_get(
69184e41 3263 response, lambda x: x['continuationContents'], dict) or {}
3264 continuation_renderer = None
3265 for key, value in continuation_contents.items():
3266 if key not in known_continuation_renderers:
3462ffa8 3267 continue
69184e41 3268 continuation_renderer = value
3269 continuation_list = [None]
3270 for entry in known_continuation_renderers[key](continuation_renderer):
3271 yield entry
3272 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3273 break
3274 if continuation_renderer:
3275 continue
c5e8d7af 3276
a1b535bd 3277 known_renderers = {
3278 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3279 'gridVideoRenderer': (self._grid_entries, 'items'),
3280 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3281 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3282 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3283 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3284 }
cce889b9 3285 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3286 continuation_items = try_get(
cce889b9 3287 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3288 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3289 video_items_renderer = None
3290 for key, value in continuation_item.items():
3291 if key not in known_renderers:
8bdd16b4 3292 continue
a1b535bd 3293 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3294 continuation_list = [None]
a1b535bd 3295 for entry in known_renderers[key][0](video_items_renderer):
3296 yield entry
9ba5705a 3297 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3298 break
3299 if video_items_renderer:
3300 continue
8bdd16b4 3301 break
9558dcec 3302
8bdd16b4 3303 @staticmethod
3304 def _extract_selected_tab(tabs):
3305 for tab in tabs:
cd684175 3306 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3307 if renderer.get('selected') is True:
3308 return renderer
2b3c2546 3309 else:
8bdd16b4 3310 raise ExtractorError('Unable to find selected tab')
b82f815f 3311
8bdd16b4 3312 @staticmethod
3313 def _extract_uploader(data):
3314 uploader = {}
3315 sidebar_renderer = try_get(
3316 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3317 if sidebar_renderer:
3318 for item in sidebar_renderer:
3319 if not isinstance(item, dict):
3320 continue
3321 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3322 if not isinstance(renderer, dict):
3323 continue
3324 owner = try_get(
3325 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3326 if owner:
3327 uploader['uploader'] = owner.get('text')
3328 uploader['uploader_id'] = try_get(
3329 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3330 uploader['uploader_url'] = urljoin(
3331 'https://www.youtube.com/',
3332 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3333 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3334
d069eca7 3335 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3336 playlist_id = title = description = channel_url = channel_name = channel_id = None
3337 thumbnails_list = tags = []
3338
8bdd16b4 3339 selected_tab = self._extract_selected_tab(tabs)
3340 renderer = try_get(
3341 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3342 if renderer:
b60419c5 3343 channel_name = renderer.get('title')
3344 channel_url = renderer.get('channelUrl')
3345 channel_id = renderer.get('externalId')
39ed931e 3346 else:
64c0d954 3347 renderer = try_get(
3348 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3349
8bdd16b4 3350 if renderer:
3351 title = renderer.get('title')
ecc97af3 3352 description = renderer.get('description', '')
b60419c5 3353 playlist_id = channel_id
3354 tags = renderer.get('keywords', '').split()
3355 thumbnails_list = (
3356 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3357 or try_get(
3358 data,
3359 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3360 list)
b60419c5 3361 or [])
3362
3363 thumbnails = []
3364 for t in thumbnails_list:
3365 if not isinstance(t, dict):
3366 continue
3367 thumbnail_url = url_or_none(t.get('url'))
3368 if not thumbnail_url:
3369 continue
3370 thumbnails.append({
3371 'url': thumbnail_url,
3372 'width': int_or_none(t.get('width')),
3373 'height': int_or_none(t.get('height')),
3374 })
3462ffa8 3375 if playlist_id is None:
70d5c17b 3376 playlist_id = item_id
3377 if title is None:
39ed931e 3378 title = (
3379 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3380 or playlist_id)
b60419c5 3381 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3382 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3383
3384 metadata = {
3385 'playlist_id': playlist_id,
3386 'playlist_title': title,
3387 'playlist_description': description,
3388 'uploader': channel_name,
3389 'uploader_id': channel_id,
3390 'uploader_url': channel_url,
3391 'thumbnails': thumbnails,
3392 'tags': tags,
3393 }
3394 if not channel_id:
3395 metadata.update(self._extract_uploader(data))
3396 metadata.update({
3397 'channel': metadata['uploader'],
3398 'channel_id': metadata['uploader_id'],
3399 'channel_url': metadata['uploader_url']})
3400 return self.playlist_result(
d069eca7
M
3401 self._entries(
3402 selected_tab, playlist_id,
3403 self._extract_identity_token(webpage, item_id),
f4f751af 3404 self._extract_account_syncid(data),
3405 self._extract_ytcfg(item_id, webpage)),
b60419c5 3406 **metadata)
73c4ac2c 3407
79360d99 3408 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3409 first_id = last_id = None
79360d99 3410 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3411 headers = self._generate_api_headers(
3412 ytcfg, account_syncid=self._extract_account_syncid(data),
3413 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3414 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3415 for page_num in itertools.count(1):
cd7c66cf 3416 videos = list(self._playlist_entries(playlist))
3417 if not videos:
3418 return
2be71994 3419 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3420 if start >= len(videos):
3421 return
3422 for video in videos[start:]:
3423 if video['id'] == first_id:
3424 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3425 return
3426 yield video
3427 first_id = first_id or videos[0]['id']
3428 last_id = videos[-1]['id']
79360d99 3429 watch_endpoint = try_get(
3430 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3431 query = {
3432 'playlistId': playlist_id,
3433 'videoId': watch_endpoint.get('videoId') or last_id,
3434 'index': watch_endpoint.get('index') or len(videos),
3435 'params': watch_endpoint.get('params') or 'OAE%3D'
3436 }
3437 response = self._extract_response(
3438 item_id='%s page %d' % (playlist_id, page_num),
3439 query=query,
3440 ep='next',
3441 headers=headers,
3442 check_get_keys='contents'
3443 )
cd7c66cf 3444 playlist = try_get(
79360d99 3445 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3446
79360d99 3447 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3448 title = playlist.get('title') or try_get(
3449 data, lambda x: x['titleText']['simpleText'], compat_str)
3450 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3451
3452 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3453 playlist_url = urljoin(url, try_get(
3454 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3455 compat_str))
3456 if playlist_url and playlist_url != url:
3457 return self.url_result(
3458 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3459 video_title=title)
cd7c66cf 3460
8bdd16b4 3461 return self.playlist_result(
79360d99 3462 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3463 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3464
95c01b6c 3465 @staticmethod
3466 def _extract_alerts(data):
3467 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3468 if not isinstance(alert_dict, dict):
3469 continue
3470 for alert in alert_dict.values():
3471 alert_type = alert.get('type')
3472 if not alert_type:
02ced43c 3473 continue
95c01b6c 3474 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
3475 if message:
3476 yield alert_type, message
3477 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3478 message += try_get(run, lambda x: x['text'], compat_str)
3479 if message:
3480 yield alert_type, message
3481
3482 def _report_alerts(self, alerts, expected=True):
3ffc7c89 3483 errors = []
3484 warnings = []
95c01b6c 3485 for alert_type, alert_message in alerts:
f3eaa8dd 3486 if alert_type.lower() == 'error':
3ffc7c89 3487 errors.append([alert_type, alert_message])
f3eaa8dd 3488 else:
3ffc7c89 3489 warnings.append([alert_type, alert_message])
f3eaa8dd 3490
3ffc7c89 3491 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3492 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3493 if errors:
3494 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3495
95c01b6c 3496 def _extract_and_report_alerts(self, data, *args, **kwargs):
3497 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
3498
358de58c 3499 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3500 """
3501 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3502 """
3503 sidebar_renderer = try_get(
5d342002 3504 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3505 if not sidebar_renderer:
3506 return
3507 browse_id = params = None
358de58c 3508 for item in sidebar_renderer:
3509 if not isinstance(item, dict):
3510 continue
3511 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3512 menu_renderer = try_get(
3513 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3514 for menu_item in menu_renderer:
3515 if not isinstance(menu_item, dict):
3516 continue
3517 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3518 text = try_get(
3519 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3520 if not text or text.lower() != 'show unavailable videos':
3521 continue
3522 browse_endpoint = try_get(
3523 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3524 browse_id = browse_endpoint.get('browseId')
3525 params = browse_endpoint.get('params')
5d342002 3526 break
3527
3528 ytcfg = self._extract_ytcfg(item_id, webpage)
3529 headers = self._generate_api_headers(
3530 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3531 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3532 visitor_data=try_get(
3533 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3534 query = {
3535 'params': params or 'wgYCCAA=',
3536 'browseId': browse_id or 'VL%s' % item_id
3537 }
3538 return self._extract_response(
3539 item_id=item_id, headers=headers, query=query,
3540 check_get_keys='contents', fatal=False,
3541 note='Downloading API JSON with unavailable videos')
358de58c 3542
79360d99 3543 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3544 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3545 response = None
3546 last_error = None
3547 count = -1
a06916d9 3548 retries = self.get_param('extractor_retries', 3)
79360d99 3549 if check_get_keys is None:
3550 check_get_keys = []
3551 while count < retries:
3552 count += 1
3553 if last_error:
3554 self.report_warning('%s. Retrying ...' % last_error)
3555 try:
3556 response = self._call_api(
3557 ep=ep, fatal=True, headers=headers,
358de58c 3558 video_id=item_id, query=query,
79360d99 3559 context=self._extract_context(ytcfg),
3560 api_key=self._extract_api_key(ytcfg),
3561 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3562 except ExtractorError as e:
3563 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3564 # Downloading page may result in intermittent 5xx HTTP error
3565 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3566 last_error = 'HTTP Error %s' % e.cause.code
3567 if count < retries:
3568 continue
358de58c 3569 if fatal:
3570 raise
3571 else:
3572 self.report_warning(error_to_compat_str(e))
3573 return
3574
79360d99 3575 else:
3576 # Youtube may send alerts if there was an issue with the continuation page
95c01b6c 3577 self._extract_and_report_alerts(response, expected=False)
79360d99 3578 if not check_get_keys or dict_get(response, check_get_keys):
3579 break
3580 # Youtube sometimes sends incomplete data
3581 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3582 last_error = 'Incomplete data received'
3583 if count >= retries:
358de58c 3584 if fatal:
3585 raise ExtractorError(last_error)
3586 else:
3587 self.report_warning(last_error)
3588 return
79360d99 3589 return response
3590
cd7c66cf 3591 def _extract_webpage(self, url, item_id):
a06916d9 3592 retries = self.get_param('extractor_retries', 3)
62bff2c1 3593 count = -1
c705177d 3594 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3595 while count < retries:
62bff2c1 3596 count += 1
14fdfea9 3597 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3598 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3599 if count:
c705177d 3600 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3601 webpage = self._download_webpage(
3602 url, item_id,
cd7c66cf 3603 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3604 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3605 if data.get('contents') or data.get('currentVideoEndpoint'):
3606 break
95c01b6c 3607 # Extract alerts here only when there is error
3608 self._extract_and_report_alerts(data)
c705177d 3609 if count >= retries:
6a39ee13 3610 raise ExtractorError(last_error)
cd7c66cf 3611 return webpage, data
3612
9297939e 3613 @staticmethod
3614 def _smuggle_data(entries, data):
3615 for entry in entries:
3616 if data:
3617 entry['url'] = smuggle_url(entry['url'], data)
3618 yield entry
3619
cd7c66cf 3620 def _real_extract(self, url):
9297939e 3621 url, smuggled_data = unsmuggle_url(url, {})
3622 if self.is_music_url(url):
3623 smuggled_data['is_music_url'] = True
fe03a6cd 3624 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3625 if info_dict.get('entries'):
3626 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3627 return info_dict
3628
fe03a6cd 3629 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3630
3631 def __real_extract(self, url, smuggled_data):
cd7c66cf 3632 item_id = self._match_id(url)
3633 url = compat_urlparse.urlunparse(
3634 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3635 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3636
fe03a6cd 3637 def get_mobj(url):
3638 mobj = self._url_re.match(url).groupdict()
3639 mobj.update((k, '') for k,v in mobj.items() if v is None)
3640 return mobj
3641
3642 mobj = get_mobj(url)
3643 # Youtube returns incomplete data if tabname is not lower case
3644 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
3645
3646 if is_channel:
3647 if smuggled_data.get('is_music_url'):
3648 if item_id[:2] == 'VL':
3649 # Youtube music VL channels have an equivalent playlist
3650 item_id = item_id[2:]
3651 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 3652 elif item_id[:2] == 'MP':
3653 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
3654 item_id = self._search_regex(
3655 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
3656 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
3657 'playlist id')
3658 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 3659 elif mobj['channel_type'] == 'browse':
3660 # Youtube music /browse/ should be changed to /channel/
3661 pre = 'https://www.youtube.com/channel/%s' % item_id
3662 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
3663 # Home URLs should redirect to /videos/
6a39ee13 3664 self.report_warning(
cd7c66cf 3665 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3666 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 3667 tab = '/videos'
3668
3669 url = ''.join((pre, tab, post))
3670 mobj = get_mobj(url)
cd7c66cf 3671
3672 # Handle both video/playlist URLs
201c1459 3673 qs = parse_qs(url)
cd7c66cf 3674 video_id = qs.get('v', [None])[0]
3675 playlist_id = qs.get('list', [None])[0]
3676
fe03a6cd 3677 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 3678 if not playlist_id:
fe03a6cd 3679 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 3680 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 3681 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 3682 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3683 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 3684 mobj = get_mobj(url)
cd7c66cf 3685
3686 if video_id and playlist_id:
a06916d9 3687 if self.get_param('noplaylist'):
cd7c66cf 3688 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3689 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3690 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3691
3692 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3693
18db7548 3694 tabs = try_get(
3695 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3696 if tabs:
3697 selected_tab = self._extract_selected_tab(tabs)
3698 tab_name = selected_tab.get('title', '')
3699 if (mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]
3700 and 'no-youtube-channel-redirect' not in compat_opts):
3701 if not mobj['not_channel'] and item_id[:2] == 'UC':
3702 # Topic channels don't have /videos. Use the equivalent playlist instead
3703 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
3704 pl_id = 'UU%s' % item_id[2:]
3705 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
3706 try:
3707 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
3708 for alert_type, alert_message in self._extract_alerts(pl_data):
3709 if alert_type == 'error':
3710 raise ExtractorError('Youtube said: %s' % alert_message)
3711 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
3712 except ExtractorError:
3713 self.report_warning('The playlist gave error. Falling back to channel URL')
3714 else:
3715 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
3716
3717 self.write_debug('Final URL: %s' % url)
3718
358de58c 3719 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3720 if 'no-youtube-unavailable-videos' not in compat_opts:
3721 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 3722 self._extract_and_report_alerts(data)
358de58c 3723
8bdd16b4 3724 tabs = try_get(
3725 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3726 if tabs:
d069eca7 3727 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3728
8bdd16b4 3729 playlist = try_get(
3730 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3731 if playlist:
79360d99 3732 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3733
a0566bbf 3734 video_id = try_get(
3735 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3736 compat_str) or video_id
8bdd16b4 3737 if video_id:
6a39ee13 3738 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3739 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3740
8bdd16b4 3741 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3742
c5e8d7af 3743
8bdd16b4 3744class YoutubePlaylistIE(InfoExtractor):
3745 IE_DESC = 'YouTube.com playlists'
3746 _VALID_URL = r'''(?x)(?:
3747 (?:https?://)?
3748 (?:\w+\.)?
3749 (?:
3750 (?:
3751 youtube(?:kids)?\.com|
29f7c58a 3752 invidio\.us
8bdd16b4 3753 )
3754 /.*?\?.*?\blist=
3755 )?
3756 (?P<id>%(playlist_id)s)
3757 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3758 IE_NAME = 'youtube:playlist'
cdc628a4 3759 _TESTS = [{
8bdd16b4 3760 'note': 'issue #673',
3761 'url': 'PLBB231211A4F62143',
cdc628a4 3762 'info_dict': {
8bdd16b4 3763 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3764 'id': 'PLBB231211A4F62143',
3765 'uploader': 'Wickydoo',
3766 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3767 },
3768 'playlist_mincount': 29,
3769 }, {
3770 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3771 'info_dict': {
3772 'title': 'YDL_safe_search',
3773 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3774 },
3775 'playlist_count': 2,
3776 'skip': 'This playlist is private',
9558dcec 3777 }, {
8bdd16b4 3778 'note': 'embedded',
3779 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3780 'playlist_count': 4,
9558dcec 3781 'info_dict': {
8bdd16b4 3782 'title': 'JODA15',
3783 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3784 'uploader': 'milan',
3785 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3786 }
cdc628a4 3787 }, {
8bdd16b4 3788 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3789 'playlist_mincount': 982,
3790 'info_dict': {
3791 'title': '2018 Chinese New Singles (11/6 updated)',
3792 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3793 'uploader': 'LBK',
3794 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3795 }
daa0df9e 3796 }, {
29f7c58a 3797 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3798 'only_matching': True,
3799 }, {
3800 # music album playlist
3801 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3802 'only_matching': True,
3803 }]
3804
3805 @classmethod
3806 def suitable(cls, url):
201c1459 3807 if YoutubeTabIE.suitable(url):
3808 return False
1bdae7d3 3809 # Hack for lazy extractors until more generic solution is implemented
3810 # (see #28780)
3811 from .youtube import parse_qs
201c1459 3812 qs = parse_qs(url)
3813 if qs.get('v', [None])[0]:
3814 return False
3815 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3816
3817 def _real_extract(self, url):
3818 playlist_id = self._match_id(url)
9297939e 3819 is_music_url = self.is_music_url(url)
3820 url = update_url_query(
3821 'https://www.youtube.com/playlist',
3822 parse_qs(url) or {'list': playlist_id})
3823 if is_music_url:
3824 url = smuggle_url(url, {'is_music_url': True})
3825 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 3826
3827
3828class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3829 IE_DESC = 'youtu.be'
29f7c58a 3830 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3831 _TESTS = [{
8bdd16b4 3832 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3833 'info_dict': {
3834 'id': 'yeWKywCrFtk',
3835 'ext': 'mp4',
3836 'title': 'Small Scale Baler and Braiding Rugs',
3837 'uploader': 'Backus-Page House Museum',
3838 'uploader_id': 'backuspagemuseum',
3839 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3840 'upload_date': '20161008',
3841 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3842 'categories': ['Nonprofits & Activism'],
3843 'tags': list,
3844 'like_count': int,
3845 'dislike_count': int,
3846 },
3847 'params': {
3848 'noplaylist': True,
3849 'skip_download': True,
3850 },
39e7107d 3851 }, {
8bdd16b4 3852 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3853 'only_matching': True,
cdc628a4
PH
3854 }]
3855
8bdd16b4 3856 def _real_extract(self, url):
29f7c58a 3857 mobj = re.match(self._VALID_URL, url)
3858 video_id = mobj.group('id')
3859 playlist_id = mobj.group('playlist_id')
8bdd16b4 3860 return self.url_result(
29f7c58a 3861 update_url_query('https://www.youtube.com/watch', {
3862 'v': video_id,
3863 'list': playlist_id,
3864 'feature': 'youtu.be',
3865 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3866
3867
3868class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3869 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3870 _VALID_URL = r'ytuser:(?P<id>.+)'
3871 _TESTS = [{
3872 'url': 'ytuser:phihag',
3873 'only_matching': True,
3874 }]
3875
3876 def _real_extract(self, url):
3877 user_id = self._match_id(url)
3878 return self.url_result(
3879 'https://www.youtube.com/user/%s' % user_id,
3880 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3881
b05654f0 3882
3d3dddc9 3883class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3884 IE_NAME = 'youtube:favorites'
3885 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3886 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3887 _LOGIN_REQUIRED = True
3888 _TESTS = [{
3889 'url': ':ytfav',
3890 'only_matching': True,
3891 }, {
3892 'url': ':ytfavorites',
3893 'only_matching': True,
3894 }]
3895
3896 def _real_extract(self, url):
3897 return self.url_result(
3898 'https://www.youtube.com/playlist?list=LL',
3899 ie=YoutubeTabIE.ie_key())
3900
3901
79360d99 3902class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3903 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3904 # there doesn't appear to be a real limit, for example if you search for
3905 # 'python' you get more than 8.000.000 results
3906 _MAX_RESULTS = float('inf')
78caa52a 3907 IE_NAME = 'youtube:search'
b05654f0 3908 _SEARCH_KEY = 'ytsearch'
6c894ea1 3909 _SEARCH_PARAMS = None
9dd8e46a 3910 _TESTS = []
b05654f0 3911
6c894ea1 3912 def _entries(self, query, n):
a5c56234 3913 data = {'query': query}
6c894ea1
U
3914 if self._SEARCH_PARAMS:
3915 data['params'] = self._SEARCH_PARAMS
3916 total = 0
3917 for page_num in itertools.count(1):
79360d99 3918 search = self._extract_response(
3919 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3920 check_get_keys=('contents', 'onResponseReceivedCommands')
3921 )
6c894ea1 3922 if not search:
b4c08069 3923 break
6c894ea1
U
3924 slr_contents = try_get(
3925 search,
3926 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3927 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3928 list)
3929 if not slr_contents:
a22b2fd1 3930 break
0366ae87 3931
0366ae87
M
3932 # Youtube sometimes adds promoted content to searches,
3933 # changing the index location of videos and token.
3934 # So we search through all entries till we find them.
30a074c2 3935 continuation_token = None
3936 for slr_content in slr_contents:
a96c6d15 3937 if continuation_token is None:
3938 continuation_token = try_get(
3939 slr_content,
3940 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3941 compat_str)
3942
30a074c2 3943 isr_contents = try_get(
3944 slr_content,
3945 lambda x: x['itemSectionRenderer']['contents'],
3946 list)
9da76d30 3947 if not isr_contents:
30a074c2 3948 continue
3949 for content in isr_contents:
3950 if not isinstance(content, dict):
3951 continue
3952 video = content.get('videoRenderer')
3953 if not isinstance(video, dict):
3954 continue
3955 video_id = video.get('videoId')
3956 if not video_id:
3957 continue
3958
3959 yield self._extract_video(video)
3960 total += 1
3961 if total == n:
3962 return
0366ae87 3963
0366ae87 3964 if not continuation_token:
6c894ea1 3965 break
0366ae87 3966 data['continuation'] = continuation_token
b05654f0 3967
6c894ea1
U
3968 def _get_n_results(self, query, n):
3969 """Get a specified number of results for a query"""
3970 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3971
c9ae7b95 3972
a3dd9248 3973class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3974 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3975 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3976 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3977 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3978
c9ae7b95 3979
386e1dd9 3980class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3981 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3982 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3983 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3984 # _MAX_RESULTS = 100
3462ffa8 3985 _TESTS = [{
3986 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3987 'playlist_mincount': 5,
3988 'info_dict': {
3989 'title': 'youtube-dl test video',
3990 }
3991 }, {
3992 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3993 'only_matching': True,
3994 }]
3995
386e1dd9 3996 @classmethod
3997 def _make_valid_url(cls):
3998 return cls._VALID_URL
3999
3462ffa8 4000 def _real_extract(self, url):
386e1dd9 4001 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4002 query = (qs.get('search_query') or qs.get('q'))[0]
4003 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4004 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4005
4006
4007class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4008 """
25f14e9f 4009 Base class for feed extractors
3d3dddc9 4010 Subclasses must define the _FEED_NAME property.
d7ae0639 4011 """
b2e8bc1b 4012 _LOGIN_REQUIRED = True
ef2f3c7f 4013 _TESTS = []
d7ae0639
JMF
4014
4015 @property
4016 def IE_NAME(self):
78caa52a 4017 return 'youtube:%s' % self._FEED_NAME
04cc9617 4018
81f0259b 4019 def _real_initialize(self):
b2e8bc1b 4020 self._login()
81f0259b 4021
3853309f 4022 def _real_extract(self, url):
3d3dddc9 4023 return self.url_result(
4024 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4025 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4026
4027
ef2f3c7f 4028class YoutubeWatchLaterIE(InfoExtractor):
4029 IE_NAME = 'youtube:watchlater'
70d5c17b 4030 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4031 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4032 _TESTS = [{
8bdd16b4 4033 'url': ':ytwatchlater',
bc7a9cd8
S
4034 'only_matching': True,
4035 }]
25f14e9f
S
4036
4037 def _real_extract(self, url):
ef2f3c7f 4038 return self.url_result(
4039 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4040
4041
25f14e9f
S
4042class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4043 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4044 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4045 _FEED_NAME = 'recommended'
3d3dddc9 4046 _TESTS = [{
4047 'url': ':ytrec',
4048 'only_matching': True,
4049 }, {
4050 'url': ':ytrecommended',
4051 'only_matching': True,
4052 }, {
4053 'url': 'https://youtube.com',
4054 'only_matching': True,
4055 }]
1ed5b5c9 4056
1ed5b5c9 4057
25f14e9f 4058class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4059 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4060 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4061 _FEED_NAME = 'subscriptions'
3d3dddc9 4062 _TESTS = [{
4063 'url': ':ytsubs',
4064 'only_matching': True,
4065 }, {
4066 'url': ':ytsubscriptions',
4067 'only_matching': True,
4068 }]
1ed5b5c9 4069
1ed5b5c9 4070
25f14e9f 4071class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4072 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4073 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4074 _FEED_NAME = 'history'
3d3dddc9 4075 _TESTS = [{
4076 'url': ':ythistory',
4077 'only_matching': True,
4078 }]
1ed5b5c9
JMF
4079
4080
15870e90
PH
4081class YoutubeTruncatedURLIE(InfoExtractor):
4082 IE_NAME = 'youtube:truncated_url'
4083 IE_DESC = False # Do not list
975d35db 4084 _VALID_URL = r'''(?x)
b95aab84
PH
4085 (?:https?://)?
4086 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4087 (?:watch\?(?:
c4808c60 4088 feature=[a-z_]+|
b95aab84
PH
4089 annotation_id=annotation_[^&]+|
4090 x-yt-cl=[0-9]+|
c1708b89 4091 hl=[^&]*|
287be8c6 4092 t=[0-9]+
b95aab84
PH
4093 )?
4094 |
4095 attribution_link\?a=[^&]+
4096 )
4097 $
975d35db 4098 '''
15870e90 4099
c4808c60 4100 _TESTS = [{
2d3d2997 4101 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4102 'only_matching': True,
dc2fc736 4103 }, {
2d3d2997 4104 'url': 'https://www.youtube.com/watch?',
dc2fc736 4105 'only_matching': True,
b95aab84
PH
4106 }, {
4107 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4108 'only_matching': True,
4109 }, {
4110 'url': 'https://www.youtube.com/watch?feature=foo',
4111 'only_matching': True,
c1708b89
PH
4112 }, {
4113 'url': 'https://www.youtube.com/watch?hl=en-GB',
4114 'only_matching': True,
287be8c6
PH
4115 }, {
4116 'url': 'https://www.youtube.com/watch?t=2372',
4117 'only_matching': True,
c4808c60
PH
4118 }]
4119
15870e90
PH
4120 def _real_extract(self, url):
4121 raise ExtractorError(
78caa52a
PH
4122 'Did you forget to quote the URL? Remember that & is a meta '
4123 'character in most shells, so you want to put the URL in quotes, '
3867038a 4124 'like youtube-dl '
2d3d2997 4125 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4126 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4127 expected=True)
772fd5cc
PH
4128
4129
4130class YoutubeTruncatedIDIE(InfoExtractor):
4131 IE_NAME = 'youtube:truncated_id'
4132 IE_DESC = False # Do not list
b95aab84 4133 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4134
4135 _TESTS = [{
4136 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4137 'only_matching': True,
4138 }]
4139
4140 def _real_extract(self, url):
4141 video_id = self._match_id(url)
4142 raise ExtractorError(
4143 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4144 expected=True)