]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[audius:artist] Add extractor (#323)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
fe03a6cd 70 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|'
68b91dc9 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
68217024 88 username, password = self._get_login_info()
b2e8bc1b
JMF
89 # No authentication to be performed
90 if username is None:
a06916d9 91 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 93 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 94 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 95 return True
b2e8bc1b 96
7cc3570e
PH
97 login_page = self._download_webpage(
98 self._LOGIN_URL, None,
69ea8ca4
PH
99 note='Downloading login page',
100 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
101 if login_page is False:
102 return
b2e8bc1b 103
1212e997 104 login_form = self._hidden_inputs(login_page)
c5e8d7af 105
e00eb564
S
106 def req(url, f_req, note, errnote):
107 data = login_form.copy()
108 data.update({
109 'pstMsg': 1,
110 'checkConnection': 'youtube',
111 'checkedDomains': 'youtube',
112 'hl': 'en',
113 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 114 'f.req': json.dumps(f_req),
e00eb564
S
115 'flowName': 'GlifWebSignIn',
116 'flowEntry': 'ServiceLogin',
baf67a60
S
117 # TODO: reverse actual botguard identifier generation algo
118 'bgRequest': '["identifier",""]',
041bc3ad 119 })
e00eb564
S
120 return self._download_json(
121 url, None, note=note, errnote=errnote,
122 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
123 fatal=False,
124 data=urlencode_postdata(data), headers={
125 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
126 'Google-Accounts-XSRF': 1,
127 })
128
3995d37d 129 def warn(message):
6a39ee13 130 self.report_warning(message)
3995d37d
S
131
132 lookup_req = [
133 username,
134 None, [], None, 'US', None, None, 2, False, True,
135 [
136 None, None,
137 [2, 1, None, 1,
138 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
139 None, [], 4],
140 1, [None, None, []], None, None, None, True
141 ],
142 username,
143 ]
144
e00eb564 145 lookup_results = req(
3995d37d 146 self._LOOKUP_URL, lookup_req,
e00eb564
S
147 'Looking up account info', 'Unable to look up account info')
148
149 if lookup_results is False:
150 return False
041bc3ad 151
3995d37d
S
152 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
153 if not user_hash:
154 warn('Unable to extract user hash')
155 return False
156
157 challenge_req = [
158 user_hash,
159 None, 1, None, [1, None, None, None, [password, None, True]],
160 [
161 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
162 1, [None, None, []], None, None, None, True
163 ]]
83317f69 164
3995d37d
S
165 challenge_results = req(
166 self._CHALLENGE_URL, challenge_req,
167 'Logging in', 'Unable to log in')
83317f69 168
3995d37d 169 if challenge_results is False:
e00eb564 170 return
83317f69 171
3995d37d
S
172 login_res = try_get(challenge_results, lambda x: x[0][5], list)
173 if login_res:
174 login_msg = try_get(login_res, lambda x: x[5], compat_str)
175 warn(
176 'Unable to login: %s' % 'Invalid password'
177 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
178 return False
179
180 res = try_get(challenge_results, lambda x: x[0][-1], list)
181 if not res:
182 warn('Unable to extract result entry')
183 return False
184
9a6628aa
S
185 login_challenge = try_get(res, lambda x: x[0][0], list)
186 if login_challenge:
187 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
188 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
189 # SEND_SUCCESS - TFA code has been successfully sent to phone
190 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 191 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
192 if status == 'QUOTA_EXCEEDED':
193 warn('Exceeded the limit of TFA codes, try later')
194 return False
195
196 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
197 if not tl:
198 warn('Unable to extract TL')
199 return False
200
201 tfa_code = self._get_tfa_info('2-step verification code')
202
203 if not tfa_code:
204 warn(
205 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
206 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
207 return False
208
209 tfa_code = remove_start(tfa_code, 'G-')
210
211 tfa_req = [
212 user_hash, None, 2, None,
213 [
214 9, None, None, None, None, None, None, None,
215 [None, tfa_code, True, 2]
216 ]]
217
218 tfa_results = req(
219 self._TFA_URL.format(tl), tfa_req,
220 'Submitting TFA code', 'Unable to submit TFA code')
221
222 if tfa_results is False:
223 return False
224
225 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
226 if tfa_res:
227 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
228 warn(
229 'Unable to finish TFA: %s' % 'Invalid TFA code'
230 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
231 return False
232
233 check_cookie_url = try_get(
234 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
235 else:
236 CHALLENGES = {
237 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
238 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
239 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
240 }
241 challenge = CHALLENGES.get(
242 challenge_str,
243 '%s returned error %s.' % (self.IE_NAME, challenge_str))
244 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
245 return False
3995d37d
S
246 else:
247 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
248
249 if not check_cookie_url:
250 warn('Unable to extract CheckCookie URL')
251 return False
e00eb564
S
252
253 check_cookie_results = self._download_webpage(
3995d37d
S
254 check_cookie_url, None, 'Checking cookie', fatal=False)
255
256 if check_cookie_results is False:
257 return False
e00eb564 258
3995d37d
S
259 if 'https://myaccount.google.com/' not in check_cookie_results:
260 warn('Unable to log in')
b2e8bc1b 261 return False
e00eb564 262
b2e8bc1b
JMF
263 return True
264
cce889b9 265 def _initialize_consent(self):
266 cookies = self._get_cookies('https://www.youtube.com/')
267 if cookies.get('__Secure-3PSID'):
268 return
269 consent_id = None
270 consent = cookies.get('CONSENT')
271 if consent:
272 if 'YES' in consent.value:
273 return
274 consent_id = self._search_regex(
275 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
276 if not consent_id:
277 consent_id = random.randint(100, 999)
278 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 279
b2e8bc1b 280 def _real_initialize(self):
cce889b9 281 self._initialize_consent()
b2e8bc1b
JMF
282 if self._downloader is None:
283 return
b2e8bc1b
JMF
284 if not self._login():
285 return
c5e8d7af 286
f4f751af 287 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
288 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 289 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 290 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
291 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 292
a5c56234
M
293 def _generate_sapisidhash_header(self):
294 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
295 if sapisid_cookie is None:
296 return
297 time_now = round(time.time())
298 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
299 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
300
301 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 302 note='Downloading API JSON', errnote='Unable to download API page',
303 context=None, api_key=None):
304
305 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 306 data.update(query)
f4f751af 307 real_headers = self._generate_api_headers()
308 real_headers.update({'content-type': 'application/json'})
309 if headers:
310 real_headers.update(headers)
545cc85d 311 return self._download_json(
a5c56234
M
312 'https://www.youtube.com/youtubei/v1/%s' % ep,
313 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 314 data=json.dumps(data).encode('utf8'), headers=real_headers,
315 query={'key': api_key or self._extract_api_key()})
316
317 def _extract_api_key(self, ytcfg=None):
318 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 319
8bdd16b4 320 def _extract_yt_initial_data(self, video_id, webpage):
321 return self._parse_json(
322 self._search_regex(
29f7c58a 323 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 324 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 325 video_id)
0c148415 326
a1c5d2ca
M
327 def _extract_identity_token(self, webpage, item_id):
328 ytcfg = self._extract_ytcfg(item_id, webpage)
329 if ytcfg:
330 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
331 if token:
332 return token
333 return self._search_regex(
334 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
335 'identity token', default=None)
336
337 @staticmethod
338 def _extract_account_syncid(data):
8ea3f7b9 339 """
340 Extract syncId required to download private playlists of secondary channels
341 @param data Either response or ytcfg
342 """
343 sync_ids = (try_get(
344 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
345 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
346 if len(sync_ids) >= 2 and sync_ids[1]:
347 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
348 # and just "user_syncid||" for primary channel. We only want the channel_syncid
349 return sync_ids[0]
8ea3f7b9 350 # ytcfg includes channel_syncid if on secondary channel
351 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 352
29f7c58a 353 def _extract_ytcfg(self, video_id, webpage):
8c54a305 354 if not webpage:
355 return {}
29f7c58a 356 return self._parse_json(
357 self._search_regex(
358 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 359 default='{}'), video_id, fatal=False) or {}
360
361 def __extract_client_version(self, ytcfg):
362 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
363
364 def _extract_context(self, ytcfg=None):
365 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
366 if context:
367 return context
368
369 # Recreate the client context (required)
370 client_version = self.__extract_client_version(ytcfg)
371 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
372 context = {
373 'client': {
374 'clientName': client_name,
375 'clientVersion': client_version,
376 }
377 }
378 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
379 if visitor_data:
380 context['client']['visitorData'] = visitor_data
381 return context
382
383 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
384 headers = {
385 'X-YouTube-Client-Name': '1',
386 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
387 }
388 if identity_token:
389 headers['x-youtube-identity-token'] = identity_token
390 if account_syncid:
391 headers['X-Goog-PageId'] = account_syncid
392 headers['X-Goog-AuthUser'] = 0
393 if visitor_data:
394 headers['x-goog-visitor-id'] = visitor_data
395 auth = self._generate_sapisidhash_header()
396 if auth is not None:
397 headers['Authorization'] = auth
398 headers['X-Origin'] = 'https://www.youtube.com'
399 return headers
29f7c58a 400
9297939e 401 @staticmethod
402 def is_music_url(url):
403 return re.match(r'https?://music\.youtube\.com/', url) is not None
404
30a074c2 405 def _extract_video(self, renderer):
406 video_id = renderer.get('videoId')
407 title = try_get(
408 renderer,
409 (lambda x: x['title']['runs'][0]['text'],
410 lambda x: x['title']['simpleText']), compat_str)
411 description = try_get(
412 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
413 compat_str)
414 duration = parse_duration(try_get(
415 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
416 view_count_text = try_get(
417 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
418 view_count = str_to_int(self._search_regex(
419 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
420 'view count', default=None))
421 uploader = try_get(
bc2ca1bb 422 renderer,
423 (lambda x: x['ownerText']['runs'][0]['text'],
424 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 425 return {
39ed931e 426 '_type': 'url',
30a074c2 427 'ie_key': YoutubeIE.ie_key(),
428 'id': video_id,
429 'url': video_id,
430 'title': title,
431 'description': description,
432 'duration': duration,
433 'view_count': view_count,
434 'uploader': uploader,
435 }
436
0c148415 437
360e1ca5 438class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 439 IE_DESC = 'YouTube.com'
bc2ca1bb 440 _INVIDIOUS_SITES = (
441 # invidious-redirect websites
442 r'(?:www\.)?redirect\.invidious\.io',
443 r'(?:(?:www|dev)\.)?invidio\.us',
444 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
445 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 446 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 447 r'(?:(?:www|au)\.)?ytprivate\.com',
448 r'(?:www\.)?invidious\.namazso\.eu',
449 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 450 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
451 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
452 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
453 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
454 # youtube-dl invidious instances list
455 r'(?:(?:www|no)\.)?invidiou\.sh',
456 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
457 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 458 r'(?:www\.)?invidious\.mastodon\.host',
459 r'(?:www\.)?invidious\.zapashcanon\.fr',
460 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 461 r'(?:www\.)?invidious\.tinfoil-hat\.net',
462 r'(?:www\.)?invidious\.himiko\.cloud',
463 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 464 r'(?:www\.)?invidious\.tube',
465 r'(?:www\.)?invidiou\.site',
466 r'(?:www\.)?invidious\.site',
467 r'(?:www\.)?invidious\.xyz',
468 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 469 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 470 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 471 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 472 r'(?:www\.)?tube\.poal\.co',
473 r'(?:www\.)?tube\.connect\.cafe',
474 r'(?:www\.)?vid\.wxzm\.sx',
475 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 476 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 477 r'(?:www\.)?yewtu\.be',
478 r'(?:www\.)?yt\.elukerio\.org',
479 r'(?:www\.)?yt\.lelux\.fi',
480 r'(?:www\.)?invidious\.ggc-project\.de',
481 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 482 r'(?:www\.)?ytprivate\.com',
483 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 484 r'(?:www\.)?invidious\.toot\.koeln',
485 r'(?:www\.)?invidious\.fdn\.fr',
486 r'(?:www\.)?watch\.nettohikari\.com',
487 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
488 r'(?:www\.)?qklhadlycap4cnod\.onion',
489 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
490 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
491 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
492 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
493 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
494 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
495 )
cb7dfeea 496 _VALID_URL = r"""(?x)^
c5e8d7af 497 (
edb53e2d 498 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 499 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
500 (?:www\.)?deturl\.com/www\.youtube\.com|
501 (?:www\.)?pwnyoutube\.com|
502 (?:www\.)?hooktube\.com|
503 (?:www\.)?yourepeat\.com|
504 tube\.majestyc\.net|
505 %(invidious)s|
506 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
507 (?:.*?\#/)? # handle anchor (#/) redirect urls
508 (?: # the various things that can precede the ID:
ac7553d0 509 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 510 |(?: # or the v= param in all its forms
f7000f3a 511 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 512 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 513 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
514 v=
515 )
f4b05232 516 ))
cbaed4bb
S
517 |(?:
518 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
519 vid\.plus| # or vid.plus/xxxx
520 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 521 %(invidious)s
cbaed4bb 522 )/
edb53e2d 523 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 524 )
c5e8d7af 525 )? # all until now is optional -> you can pass the naked ID
201c1459 526 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 527 (?(1).+)? # if we found the ID, everything can follow
9297939e 528 (?:\#|$)""" % {
bc2ca1bb 529 'invidious': '|'.join(_INVIDIOUS_SITES),
530 }
e40c758c 531 _PLAYER_INFO_RE = (
cc2db878 532 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
533 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 534 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 535 )
2c62dc26 536 _formats = {
c2d3cb4c 537 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
538 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
539 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
540 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
541 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
542 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
543 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
544 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 545 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 546 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
547 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
548 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
549 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
550 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
551 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 552 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 553 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
554 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 555
556
557 # 3D videos
c2d3cb4c 558 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
559 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
560 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
561 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 562 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
563 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
564 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 565
96fb5605 566 # Apple HTTP Live Streaming
11f12195 567 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 568 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
569 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
570 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
571 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
572 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 573 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
574 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
575
576 # DASH mp4 video
d23028a8
S
577 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
578 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
579 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
580 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
581 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 582 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
583 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
584 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
585 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
586 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
587 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
588 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 589
f6f1fc92 590 # Dash mp4 audio
d23028a8
S
591 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
592 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
593 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
594 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
595 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
596 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
597 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
598
599 # Dash webm
d23028a8
S
600 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
601 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
602 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
603 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
604 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
605 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
606 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
607 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
608 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
609 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
610 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
611 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
612 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
613 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
614 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 615 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
616 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
617 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
618 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
619 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
620 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
621 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
622
623 # Dash webm audio
d23028a8
S
624 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
625 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 626
0857baad 627 # Dash webm audio with opus inside
d23028a8
S
628 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
629 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
630 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 631
ce6b9a2d
PH
632 # RTMP (unnamed)
633 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
634
635 # av01 video only formats sometimes served with "unknown" codecs
636 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
637 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
638 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
639 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 640 }
29f7c58a 641 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 642
fd5c4aab
S
643 _GEO_BYPASS = False
644
78caa52a 645 IE_NAME = 'youtube'
2eb88d95
PH
646 _TESTS = [
647 {
2d3d2997 648 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
649 'info_dict': {
650 'id': 'BaW_jenozKc',
651 'ext': 'mp4',
3867038a 652 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
653 'uploader': 'Philipp Hagemeister',
654 'uploader_id': 'phihag',
ec85ded8 655 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
656 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
657 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 658 'upload_date': '20121002',
3867038a 659 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 660 'categories': ['Science & Technology'],
3867038a 661 'tags': ['youtube-dl'],
556dbe7f 662 'duration': 10,
dbdaaa23 663 'view_count': int,
3e7c1224
PH
664 'like_count': int,
665 'dislike_count': int,
7c80519c 666 'start_time': 1,
297a564b 667 'end_time': 9,
2eb88d95 668 }
0e853ca4 669 },
fccd3771 670 {
4bc3a23e
PH
671 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
672 'note': 'Embed-only video (#1746)',
673 'info_dict': {
674 'id': 'yZIXLfi8CZQ',
675 'ext': 'mp4',
676 'upload_date': '20120608',
677 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
678 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
679 'uploader': 'SET India',
94bfcd23 680 'uploader_id': 'setindia',
ec85ded8 681 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 682 'age_limit': 18,
545cc85d 683 },
684 'skip': 'Private video',
fccd3771 685 },
11b56058 686 {
8bdd16b4 687 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
688 'note': 'Use the first video ID in the URL',
689 'info_dict': {
690 'id': 'BaW_jenozKc',
691 'ext': 'mp4',
3867038a 692 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
693 'uploader': 'Philipp Hagemeister',
694 'uploader_id': 'phihag',
ec85ded8 695 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 696 'upload_date': '20121002',
3867038a 697 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 698 'categories': ['Science & Technology'],
3867038a 699 'tags': ['youtube-dl'],
556dbe7f 700 'duration': 10,
dbdaaa23 701 'view_count': int,
11b56058
PM
702 'like_count': int,
703 'dislike_count': int,
34a7de29
S
704 },
705 'params': {
706 'skip_download': True,
707 },
11b56058 708 },
dd27fd17 709 {
2d3d2997 710 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
711 'note': '256k DASH audio (format 141) via DASH manifest',
712 'info_dict': {
713 'id': 'a9LDPn-MO4I',
714 'ext': 'm4a',
715 'upload_date': '20121002',
716 'uploader_id': '8KVIDEO',
ec85ded8 717 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
718 'description': '',
719 'uploader': '8KVIDEO',
720 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 721 },
4bc3a23e
PH
722 'params': {
723 'youtube_include_dash_manifest': True,
724 'format': '141',
4919603f 725 },
de3c7fe0 726 'skip': 'format 141 not served anymore',
dd27fd17 727 },
8bdd16b4 728 # DASH manifest with encrypted signature
729 {
730 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
731 'info_dict': {
732 'id': 'IB3lcPjvWLA',
733 'ext': 'm4a',
734 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
735 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
736 'duration': 244,
737 'uploader': 'AfrojackVEVO',
738 'uploader_id': 'AfrojackVEVO',
739 'upload_date': '20131011',
cc2db878 740 'abr': 129.495,
8bdd16b4 741 },
742 'params': {
743 'youtube_include_dash_manifest': True,
744 'format': '141/bestaudio[ext=m4a]',
745 },
746 },
aa79ac0c
PH
747 # Controversy video
748 {
749 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
750 'info_dict': {
751 'id': 'T4XJQO3qol8',
752 'ext': 'mp4',
556dbe7f 753 'duration': 219,
aa79ac0c 754 'upload_date': '20100909',
4fe54c12 755 'uploader': 'Amazing Atheist',
aa79ac0c 756 'uploader_id': 'TheAmazingAtheist',
ec85ded8 757 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 758 'title': 'Burning Everyone\'s Koran',
545cc85d 759 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 760 }
c522adb1 761 },
dd2d55f1 762 # Normal age-gate video (embed allowed)
c522adb1 763 {
2d3d2997 764 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
765 'info_dict': {
766 'id': 'HtVdAasjOgU',
767 'ext': 'mp4',
768 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 769 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 770 'duration': 142,
c522adb1
JMF
771 'uploader': 'The Witcher',
772 'uploader_id': 'WitcherGame',
ec85ded8 773 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 774 'upload_date': '20140605',
34952f09 775 'age_limit': 18,
c522adb1
JMF
776 },
777 },
8bdd16b4 778 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
779 # YouTube Red ad is not captured for creator
780 {
781 'url': '__2ABJjxzNo',
782 'info_dict': {
783 'id': '__2ABJjxzNo',
784 'ext': 'mp4',
785 'duration': 266,
786 'upload_date': '20100430',
787 'uploader_id': 'deadmau5',
788 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 789 'creator': 'deadmau5',
790 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 791 'uploader': 'deadmau5',
792 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 793 'alt_title': 'Some Chords',
8bdd16b4 794 },
795 'expected_warnings': [
796 'DASH manifest missing',
797 ]
798 },
067aa17e 799 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
800 {
801 'url': 'lqQg6PlCWgI',
802 'info_dict': {
803 'id': 'lqQg6PlCWgI',
804 'ext': 'mp4',
556dbe7f 805 'duration': 6085,
90227264 806 'upload_date': '20150827',
cbe2bd91 807 'uploader_id': 'olympic',
ec85ded8 808 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 809 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 810 'uploader': 'Olympic',
cbe2bd91
PH
811 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
812 },
813 'params': {
814 'skip_download': 'requires avconv',
e52a40ab 815 }
cbe2bd91 816 },
6271f1ca
PH
817 # Non-square pixels
818 {
819 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
820 'info_dict': {
821 'id': '_b-2C3KPAM0',
822 'ext': 'mp4',
823 'stretched_ratio': 16 / 9.,
556dbe7f 824 'duration': 85,
6271f1ca
PH
825 'upload_date': '20110310',
826 'uploader_id': 'AllenMeow',
ec85ded8 827 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 828 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 829 'uploader': '孫ᄋᄅ',
6271f1ca
PH
830 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
831 },
06b491eb
S
832 },
833 # url_encoded_fmt_stream_map is empty string
834 {
835 'url': 'qEJwOuvDf7I',
836 'info_dict': {
837 'id': 'qEJwOuvDf7I',
f57b7835 838 'ext': 'webm',
06b491eb
S
839 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
840 'description': '',
841 'upload_date': '20150404',
842 'uploader_id': 'spbelect',
843 'uploader': 'Наблюдатели Петербурга',
844 },
845 'params': {
846 'skip_download': 'requires avconv',
e323cf3f
S
847 },
848 'skip': 'This live event has ended.',
06b491eb 849 },
067aa17e 850 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
851 {
852 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
853 'info_dict': {
854 'id': 'FIl7x6_3R5Y',
eb6793ba 855 'ext': 'webm',
da77d856
S
856 'title': 'md5:7b81415841e02ecd4313668cde88737a',
857 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 858 'duration': 220,
da77d856
S
859 'upload_date': '20150625',
860 'uploader_id': 'dorappi2000',
ec85ded8 861 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 862 'uploader': 'dorappi2000',
eb6793ba 863 'formats': 'mincount:31',
da77d856 864 },
eb6793ba 865 'skip': 'not actual anymore',
2ee8f5d8 866 },
8a1a26ce
YCH
867 # DASH manifest with segment_list
868 {
869 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
870 'md5': '8ce563a1d667b599d21064e982ab9e31',
871 'info_dict': {
872 'id': 'CsmdDsKjzN8',
873 'ext': 'mp4',
17ee98e1 874 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
875 'uploader': 'Airtek',
876 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
877 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
878 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
879 },
880 'params': {
881 'youtube_include_dash_manifest': True,
882 'format': '135', # bestvideo
be49068d
S
883 },
884 'skip': 'This live event has ended.',
2ee8f5d8 885 },
cf7e015f
S
886 {
887 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 888 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 889 'info_dict': {
545cc85d 890 'id': 'jvGDaLqkpTg',
891 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
892 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
893 },
894 'playlist': [{
895 'info_dict': {
545cc85d 896 'id': 'jvGDaLqkpTg',
cf7e015f 897 'ext': 'mp4',
545cc85d 898 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
899 'description': 'md5:e03b909557865076822aa169218d6a5d',
900 'duration': 10643,
901 'upload_date': '20161111',
902 'uploader': 'Team PGP',
903 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
904 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
905 },
906 }, {
907 'info_dict': {
545cc85d 908 'id': '3AKt1R1aDnw',
cf7e015f 909 'ext': 'mp4',
545cc85d 910 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
911 'description': 'md5:e03b909557865076822aa169218d6a5d',
912 'duration': 10991,
913 'upload_date': '20161111',
914 'uploader': 'Team PGP',
915 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
916 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
917 },
918 }, {
919 'info_dict': {
545cc85d 920 'id': 'RtAMM00gpVc',
cf7e015f 921 'ext': 'mp4',
545cc85d 922 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
923 'description': 'md5:e03b909557865076822aa169218d6a5d',
924 'duration': 10995,
925 'upload_date': '20161111',
926 'uploader': 'Team PGP',
927 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
928 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
929 },
930 }, {
931 'info_dict': {
545cc85d 932 'id': '6N2fdlP3C5U',
cf7e015f 933 'ext': 'mp4',
545cc85d 934 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
935 'description': 'md5:e03b909557865076822aa169218d6a5d',
936 'duration': 10990,
937 'upload_date': '20161111',
938 'uploader': 'Team PGP',
939 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
940 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
941 },
942 }],
943 'params': {
944 'skip_download': True,
945 },
cbaed4bb 946 },
f9f49d87 947 {
067aa17e 948 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
949 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
950 'info_dict': {
951 'id': 'gVfLd0zydlo',
952 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
953 },
954 'playlist_count': 2,
be49068d 955 'skip': 'Not multifeed anymore',
f9f49d87 956 },
cbaed4bb 957 {
2d3d2997 958 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 959 'only_matching': True,
0e49d9a6 960 },
6d4fc66b 961 {
2d3d2997 962 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
963 'only_matching': True,
964 },
0e49d9a6 965 {
067aa17e 966 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 967 # Also tests cut-off URL expansion in video description (see
067aa17e
S
968 # https://github.com/ytdl-org/youtube-dl/issues/1892,
969 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
970 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
971 'info_dict': {
972 'id': 'lsguqyKfVQg',
973 'ext': 'mp4',
974 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 975 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 976 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 977 'duration': 133,
0e49d9a6
LL
978 'upload_date': '20151119',
979 'uploader_id': 'IronSoulElf',
ec85ded8 980 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 981 'uploader': 'IronSoulElf',
eb6793ba
S
982 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
983 'track': 'Dark Walk - Position Music',
984 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 985 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
986 },
987 'params': {
988 'skip_download': True,
989 },
990 },
61f92af1 991 {
067aa17e 992 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
993 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
994 'only_matching': True,
995 },
313dfc45
LL
996 {
997 # Video with yt:stretch=17:0
998 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
999 'info_dict': {
1000 'id': 'Q39EVAstoRM',
1001 'ext': 'mp4',
1002 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1003 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1004 'upload_date': '20151107',
1005 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1006 'uploader': 'CH GAMER DROID',
1007 },
1008 'params': {
1009 'skip_download': True,
1010 },
be49068d 1011 'skip': 'This video does not exist.',
313dfc45 1012 },
201c1459 1013 {
1014 # Video with incomplete 'yt:stretch=16:'
1015 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1016 'only_matching': True,
1017 },
7caf9830
S
1018 {
1019 # Video licensed under Creative Commons
1020 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1021 'info_dict': {
1022 'id': 'M4gD1WSo5mA',
1023 'ext': 'mp4',
1024 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1025 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1026 'duration': 721,
7caf9830
S
1027 'upload_date': '20150127',
1028 'uploader_id': 'BerkmanCenter',
ec85ded8 1029 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1030 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1031 'license': 'Creative Commons Attribution license (reuse allowed)',
1032 },
1033 'params': {
1034 'skip_download': True,
1035 },
1036 },
fd050249
S
1037 {
1038 # Channel-like uploader_url
1039 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1040 'info_dict': {
1041 'id': 'eQcmzGIKrzg',
1042 'ext': 'mp4',
1043 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1044 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1045 'duration': 4060,
fd050249 1046 'upload_date': '20151119',
eb6793ba 1047 'uploader': 'Bernie Sanders',
fd050249 1048 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1049 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1050 'license': 'Creative Commons Attribution license (reuse allowed)',
1051 },
1052 'params': {
1053 'skip_download': True,
1054 },
1055 },
040ac686
S
1056 {
1057 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1058 'only_matching': True,
7f29cf54
S
1059 },
1060 {
067aa17e 1061 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1062 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1063 'only_matching': True,
6496ccb4
S
1064 },
1065 {
1066 # Rental video preview
1067 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1068 'info_dict': {
1069 'id': 'uGpuVWrhIzE',
1070 'ext': 'mp4',
1071 'title': 'Piku - Trailer',
1072 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1073 'upload_date': '20150811',
1074 'uploader': 'FlixMatrix',
1075 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1076 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1077 'license': 'Standard YouTube License',
1078 },
1079 'params': {
1080 'skip_download': True,
1081 },
eb6793ba 1082 'skip': 'This video is not available.',
022a5d66 1083 },
12afdc2a
S
1084 {
1085 # YouTube Red video with episode data
1086 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1087 'info_dict': {
1088 'id': 'iqKdEhx-dD4',
1089 'ext': 'mp4',
1090 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1091 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1092 'duration': 2085,
12afdc2a
S
1093 'upload_date': '20170118',
1094 'uploader': 'Vsauce',
1095 'uploader_id': 'Vsauce',
1096 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1097 'series': 'Mind Field',
1098 'season_number': 1,
1099 'episode_number': 1,
1100 },
1101 'params': {
1102 'skip_download': True,
1103 },
1104 'expected_warnings': [
1105 'Skipping DASH manifest',
1106 ],
1107 },
c7121fa7
S
1108 {
1109 # The following content has been identified by the YouTube community
1110 # as inappropriate or offensive to some audiences.
1111 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1112 'info_dict': {
1113 'id': '6SJNVb0GnPI',
1114 'ext': 'mp4',
1115 'title': 'Race Differences in Intelligence',
1116 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1117 'duration': 965,
1118 'upload_date': '20140124',
1119 'uploader': 'New Century Foundation',
1120 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1121 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1122 },
1123 'params': {
1124 'skip_download': True,
1125 },
545cc85d 1126 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1127 },
022a5d66
S
1128 {
1129 # itag 212
1130 'url': '1t24XAntNCY',
1131 'only_matching': True,
fd5c4aab
S
1132 },
1133 {
1134 # geo restricted to JP
1135 'url': 'sJL6WA-aGkQ',
1136 'only_matching': True,
1137 },
cd5a74a2
S
1138 {
1139 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1140 'only_matching': True,
1141 },
bc2ca1bb 1142 {
1143 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1144 'only_matching': True,
1145 },
1146 {
1147 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1148 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1149 'only_matching': True,
1150 },
825cd268
RA
1151 {
1152 # DRM protected
1153 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1154 'only_matching': True,
4fe54c12
S
1155 },
1156 {
1157 # Video with unsupported adaptive stream type formats
1158 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1159 'info_dict': {
1160 'id': 'Z4Vy8R84T1U',
1161 'ext': 'mp4',
1162 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1163 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1164 'duration': 433,
1165 'upload_date': '20130923',
1166 'uploader': 'Amelia Putri Harwita',
1167 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1168 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1169 'formats': 'maxcount:10',
1170 },
1171 'params': {
1172 'skip_download': True,
1173 'youtube_include_dash_manifest': False,
1174 },
5429d6a9 1175 'skip': 'not actual anymore',
5caabd3c 1176 },
1177 {
822b9d9c 1178 # Youtube Music Auto-generated description
5caabd3c 1179 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1180 'info_dict': {
1181 'id': 'MgNrAu2pzNs',
1182 'ext': 'mp4',
1183 'title': 'Voyeur Girl',
1184 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1185 'upload_date': '20190312',
5429d6a9
S
1186 'uploader': 'Stephen - Topic',
1187 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1188 'artist': 'Stephen',
1189 'track': 'Voyeur Girl',
1190 'album': 'it\'s too much love to know my dear',
1191 'release_date': '20190313',
1192 'release_year': 2019,
1193 },
1194 'params': {
1195 'skip_download': True,
1196 },
1197 },
66b48727
RA
1198 {
1199 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1200 'only_matching': True,
1201 },
011e75e6
S
1202 {
1203 # invalid -> valid video id redirection
1204 'url': 'DJztXj2GPfl',
1205 'info_dict': {
1206 'id': 'DJztXj2GPfk',
1207 'ext': 'mp4',
1208 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1209 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1210 'upload_date': '20090125',
1211 'uploader': 'Prochorowka',
1212 'uploader_id': 'Prochorowka',
1213 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1214 'artist': 'Panjabi MC',
1215 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1216 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1217 },
1218 'params': {
1219 'skip_download': True,
1220 },
545cc85d 1221 'skip': 'Video unavailable',
ea74e00b
DP
1222 },
1223 {
1224 # empty description results in an empty string
1225 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1226 'info_dict': {
1227 'id': 'x41yOUIvK2k',
1228 'ext': 'mp4',
1229 'title': 'IMG 3456',
1230 'description': '',
1231 'upload_date': '20170613',
1232 'uploader_id': 'ElevageOrVert',
1233 'uploader': 'ElevageOrVert',
1234 },
1235 'params': {
1236 'skip_download': True,
1237 },
1238 },
a0566bbf 1239 {
29f7c58a 1240 # with '};' inside yt initial data (see [1])
1241 # see [2] for an example with '};' inside ytInitialPlayerResponse
1242 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1243 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1244 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1245 'info_dict': {
1246 'id': 'CHqg6qOn4no',
1247 'ext': 'mp4',
1248 'title': 'Part 77 Sort a list of simple types in c#',
1249 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1250 'upload_date': '20130831',
1251 'uploader_id': 'kudvenkat',
1252 'uploader': 'kudvenkat',
1253 },
1254 'params': {
1255 'skip_download': True,
1256 },
1257 },
29f7c58a 1258 {
1259 # another example of '};' in ytInitialData
1260 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1261 'only_matching': True,
1262 },
1263 {
1264 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1265 'only_matching': True,
1266 },
545cc85d 1267 {
cc2db878 1268 # https://github.com/ytdl-org/youtube-dl/pull/28094
1269 'url': 'OtqTfy26tG0',
1270 'info_dict': {
1271 'id': 'OtqTfy26tG0',
1272 'ext': 'mp4',
1273 'title': 'Burn Out',
1274 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1275 'upload_date': '20141120',
1276 'uploader': 'The Cinematic Orchestra - Topic',
1277 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1278 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1279 'artist': 'The Cinematic Orchestra',
1280 'track': 'Burn Out',
1281 'album': 'Every Day',
1282 'release_data': None,
1283 'release_year': None,
1284 },
1285 'params': {
1286 'skip_download': True,
1287 },
545cc85d 1288 },
bc2ca1bb 1289 {
1290 # controversial video, only works with bpctr when authenticated with cookies
1291 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1292 'only_matching': True,
1293 },
f7ad7160 1294 {
1295 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1296 'url': 'cBvYw8_A0vQ',
1297 'info_dict': {
1298 'id': 'cBvYw8_A0vQ',
1299 'ext': 'mp4',
1300 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1301 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1302 'upload_date': '20201120',
1303 'uploader': 'Walk around Japan',
1304 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1305 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1306 },
1307 'params': {
1308 'skip_download': True,
1309 },
0fb983f6 1310 }, {
1311 # Has multiple audio streams
1312 'url': 'WaOKSUlf4TM',
1313 'only_matching': True
9297939e 1314 }, {
1315 # Requires Premium: has format 141 when requested using YTM url
1316 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1317 'only_matching': True
1318 }, {
120916da 1319 # multiple subtitles with same lang_code
1320 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1321 'only_matching': True,
1322 },
2eb88d95
PH
1323 ]
1324
201c1459 1325 @classmethod
1326 def suitable(cls, url):
1bdae7d3 1327 # Hack for lazy extractors until more generic solution is implemented
1328 # (see #28780)
1329 from .youtube import parse_qs
201c1459 1330 qs = parse_qs(url)
1331 if qs.get('list', [None])[0]:
1332 return False
1333 return super(YoutubeIE, cls).suitable(url)
1334
e0df6211
PH
1335 def __init__(self, *args, **kwargs):
1336 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1337 self._code_cache = {}
83799698 1338 self._player_cache = {}
e0df6211 1339
60064c53
PH
1340 def _signature_cache_id(self, example_sig):
1341 """ Return a string representation of a signature """
78caa52a 1342 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1343
e40c758c
S
1344 @classmethod
1345 def _extract_player_info(cls, player_url):
1346 for player_re in cls._PLAYER_INFO_RE:
1347 id_m = re.search(player_re, player_url)
1348 if id_m:
1349 break
1350 else:
c081b35c 1351 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1352 return id_m.group('id')
e40c758c
S
1353
1354 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1355 player_id = self._extract_player_info(player_url)
e0df6211 1356
c4417ddb 1357 # Read from filesystem cache
545cc85d 1358 func_id = 'js_%s_%s' % (
1359 player_id, self._signature_cache_id(example_sig))
c4417ddb 1360 assert os.path.basename(func_id) == func_id
a0e07d31 1361
69ea8ca4 1362 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1363 if cache_spec is not None:
78caa52a 1364 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1365
545cc85d 1366 if player_id not in self._code_cache:
1367 self._code_cache[player_id] = self._download_webpage(
e0df6211 1368 player_url, video_id,
545cc85d 1369 note='Downloading player ' + player_id,
69ea8ca4 1370 errnote='Download of %s failed' % player_url)
545cc85d 1371 code = self._code_cache[player_id]
1372 res = self._parse_sig_js(code)
e0df6211 1373
785521bf
PH
1374 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1375 cache_res = res(test_string)
1376 cache_spec = [ord(c) for c in cache_res]
83799698 1377
69ea8ca4 1378 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1379 return res
1380
60064c53 1381 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1382 def gen_sig_code(idxs):
1383 def _genslice(start, end, step):
78caa52a 1384 starts = '' if start == 0 else str(start)
8bcc8756 1385 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1386 steps = '' if step == 1 else (':%d' % step)
78caa52a 1387 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1388
1389 step = None
7af808a5
PH
1390 # Quelch pyflakes warnings - start will be set when step is set
1391 start = '(Never used)'
edf3e38e
PH
1392 for i, prev in zip(idxs[1:], idxs[:-1]):
1393 if step is not None:
1394 if i - prev == step:
1395 continue
1396 yield _genslice(start, prev, step)
1397 step = None
1398 continue
1399 if i - prev in [-1, 1]:
1400 step = i - prev
1401 start = prev
1402 continue
1403 else:
78caa52a 1404 yield 's[%d]' % prev
edf3e38e 1405 if step is None:
78caa52a 1406 yield 's[%d]' % i
edf3e38e
PH
1407 else:
1408 yield _genslice(start, i, step)
1409
78caa52a 1410 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1411 cache_res = func(test_string)
edf3e38e 1412 cache_spec = [ord(c) for c in cache_res]
78caa52a 1413 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1414 signature_id_tuple = '(%s)' % (
1415 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1416 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1417 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1418 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1419
e0df6211
PH
1420 def _parse_sig_js(self, jscode):
1421 funcname = self._search_regex(
abefc03f
S
1422 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1423 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1424 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1425 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1426 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1427 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1428 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1429 # Obsolete patterns
1430 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1431 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1432 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1433 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1434 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1435 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1436 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1437 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1438 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1439
1440 jsi = JSInterpreter(jscode)
1441 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1442 return lambda s: initial_function([s])
1443
545cc85d 1444 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1445 """Turn the encrypted s field into a working signature"""
6b37f0be 1446
c8bf86d5 1447 if player_url is None:
69ea8ca4 1448 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1449
69ea8ca4 1450 if player_url.startswith('//'):
78caa52a 1451 player_url = 'https:' + player_url
3c90cc8b
S
1452 elif not re.match(r'https?://', player_url):
1453 player_url = compat_urlparse.urljoin(
1454 'https://www.youtube.com', player_url)
c8bf86d5 1455 try:
62af3a0e 1456 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1457 if player_id not in self._player_cache:
1458 func = self._extract_signature_function(
60064c53 1459 video_id, player_url, s
c8bf86d5
PH
1460 )
1461 self._player_cache[player_id] = func
1462 func = self._player_cache[player_id]
a06916d9 1463 if self.get_param('youtube_print_sig_code'):
60064c53 1464 self._print_sig_code(func, s)
c8bf86d5
PH
1465 return func(s)
1466 except Exception as e:
1467 tb = traceback.format_exc()
1468 raise ExtractorError(
78caa52a 1469 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1470
545cc85d 1471 def _mark_watched(self, video_id, player_response):
21c340b8
S
1472 playback_url = url_or_none(try_get(
1473 player_response,
545cc85d 1474 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1475 if not playback_url:
1476 return
1477 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1478 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1479
1480 # cpn generation algorithm is reverse engineered from base.js.
1481 # In fact it works even with dummy cpn.
1482 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1483 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1484
1485 qs.update({
1486 'ver': ['2'],
1487 'cpn': [cpn],
1488 })
1489 playback_url = compat_urlparse.urlunparse(
15707c7e 1490 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1491
1492 self._download_webpage(
1493 playback_url, video_id, 'Marking watched',
1494 'Unable to mark watched', fatal=False)
1495
66c9fa36
S
1496 @staticmethod
1497 def _extract_urls(webpage):
1498 # Embedded YouTube player
1499 entries = [
1500 unescapeHTML(mobj.group('url'))
1501 for mobj in re.finditer(r'''(?x)
1502 (?:
1503 <iframe[^>]+?src=|
1504 data-video-url=|
1505 <embed[^>]+?src=|
1506 embedSWF\(?:\s*|
1507 <object[^>]+data=|
1508 new\s+SWFObject\(
1509 )
1510 (["\'])
1511 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1512 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1513 \1''', webpage)]
1514
1515 # lazyYT YouTube embed
1516 entries.extend(list(map(
1517 unescapeHTML,
1518 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1519
1520 # Wordpress "YouTube Video Importer" plugin
1521 matches = re.findall(r'''(?x)<div[^>]+
1522 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1523 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1524 entries.extend(m[-1] for m in matches)
1525
1526 return entries
1527
1528 @staticmethod
1529 def _extract_url(webpage):
1530 urls = YoutubeIE._extract_urls(webpage)
1531 return urls[0] if urls else None
1532
97665381
PH
1533 @classmethod
1534 def extract_id(cls, url):
1535 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1536 if mobj is None:
69ea8ca4 1537 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1538 video_id = mobj.group(2)
1539 return video_id
1540
545cc85d 1541 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1542 chapters_list = try_get(
8bdd16b4 1543 data,
84213ea8
S
1544 lambda x: x['playerOverlays']
1545 ['playerOverlayRenderer']
1546 ['decoratedPlayerBarRenderer']
1547 ['decoratedPlayerBarRenderer']
1548 ['playerBar']
1549 ['chapteredPlayerBarRenderer']
1550 ['chapters'],
1551 list)
1552 if not chapters_list:
1553 return
1554
1555 def chapter_time(chapter):
1556 return float_or_none(
1557 try_get(
1558 chapter,
1559 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1560 int),
1561 scale=1000)
1562 chapters = []
1563 for next_num, chapter in enumerate(chapters_list, start=1):
1564 start_time = chapter_time(chapter)
1565 if start_time is None:
1566 continue
1567 end_time = (chapter_time(chapters_list[next_num])
1568 if next_num < len(chapters_list) else duration)
1569 if end_time is None:
1570 continue
1571 title = try_get(
1572 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1573 compat_str)
1574 chapters.append({
1575 'start_time': start_time,
1576 'end_time': end_time,
1577 'title': title,
1578 })
1579 return chapters
1580
545cc85d 1581 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1582 return self._parse_json(self._search_regex(
1583 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1584 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1585
d92f5d5a 1586 @staticmethod
1587 def parse_time_text(time_text):
1588 """
1589 Parse the comment time text
1590 time_text is in the format 'X units ago (edited)'
1591 """
1592 time_text_split = time_text.split(' ')
1593 if len(time_text_split) >= 3:
1594 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1595
a1c5d2ca
M
1596 @staticmethod
1597 def _join_text_entries(runs):
1598 text = None
1599 for run in runs:
1600 if not isinstance(run, dict):
1601 continue
1602 sub_text = try_get(run, lambda x: x['text'], compat_str)
1603 if sub_text:
1604 if not text:
1605 text = sub_text
1606 continue
1607 text += sub_text
1608 return text
1609
1610 def _extract_comment(self, comment_renderer, parent=None):
1611 comment_id = comment_renderer.get('commentId')
1612 if not comment_id:
1613 return
1614 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1615 text = self._join_text_entries(comment_text_runs) or ''
1616 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1617 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1618 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1619 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1620 author_id = try_get(comment_renderer,
1621 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1622 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1623 lambda x: x['likeCount']), compat_str)) or 0
1624 author_thumbnail = try_get(comment_renderer,
1625 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1626
1627 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1628 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1629 return {
1630 'id': comment_id,
1631 'text': text,
d92f5d5a 1632 'timestamp': timestamp,
a1c5d2ca
M
1633 'time_text': time_text,
1634 'like_count': votes,
1635 'is_favorited': is_liked,
1636 'author': author,
1637 'author_id': author_id,
1638 'author_thumbnail': author_thumbnail,
1639 'author_is_uploader': author_is_uploader,
1640 'parent': parent or 'root'
1641 }
1642
1643 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1644 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1645
1646 def extract_thread(parent_renderer):
1647 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1648 if not parent:
1649 comment_counts[2] = 0
1650 for content in contents:
1651 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1652 comment_renderer = try_get(
1653 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1654 content, (lambda x: x['commentRenderer'], dict))
1655
1656 if not comment_renderer:
1657 continue
1658 comment = self._extract_comment(comment_renderer, parent)
1659 if not comment:
1660 continue
1661 comment_counts[0] += 1
1662 yield comment
1663 # Attempt to get the replies
1664 comment_replies_renderer = try_get(
1665 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1666
1667 if comment_replies_renderer:
1668 comment_counts[2] += 1
1669 comment_entries_iter = self._comment_entries(
f4f751af 1670 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1671 parent=comment.get('id'), session_token_list=session_token_list,
1672 comment_counts=comment_counts)
1673
1674 for reply_comment in comment_entries_iter:
1675 yield reply_comment
1676
1677 if not comment_counts:
1678 # comment so far, est. total comments, current comment thread #
1679 comment_counts = [0, 0, 0]
a1c5d2ca
M
1680
1681 # TODO: Generalize the download code with TabIE
f4f751af 1682 context = self._extract_context(ytcfg)
1683 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1684 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1685 first_continuation = False
1686 if parent is None:
1687 first_continuation = True
1688
1689 for page_num in itertools.count(0):
1690 if not continuation:
1691 break
f4f751af 1692 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a06916d9 1693 retries = self.get_param('extractor_retries', 3)
a1c5d2ca
M
1694 count = -1
1695 last_error = None
1696
1697 while count < retries:
1698 count += 1
1699 if last_error:
1700 self.report_warning('%s. Retrying ...' % last_error)
1701 try:
1702 query = {
1703 'ctoken': continuation['ctoken'],
1704 'pbj': 1,
1705 'type': 'next',
1706 }
1707 if parent:
1708 query['action_get_comment_replies'] = 1
1709 else:
1710 query['action_get_comments'] = 1
1711
1712 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1713 if page_num == 0:
1714 if first_continuation:
d92f5d5a 1715 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1716 else:
d92f5d5a 1717 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1718 else:
d92f5d5a 1719 note_prefix = '%sDownloading comment%s page %d %s' % (
1720 ' ' if parent else '',
a1c5d2ca
M
1721 ' replies' if parent else '',
1722 page_num,
1723 comment_prog_str)
1724
1725 browse = self._download_json(
1726 'https://www.youtube.com/comment_service_ajax', None,
1727 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1728 headers=headers, query=query,
1729 data=urlencode_postdata({
1730 'session_token': session_token_list[0]
1731 }))
1732 except ExtractorError as e:
1733 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1734 if e.cause.code == 413:
d92f5d5a 1735 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1736 return
1737 # Downloading page may result in intermittent 5xx HTTP error
1738 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1739 last_error = 'HTTP Error %s' % e.cause.code
1740 if e.cause.code == 404:
d92f5d5a 1741 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1742 if count < retries:
1743 continue
1744 raise
1745 else:
1746 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1747 if session_token:
1748 session_token_list[0] = session_token
1749
1750 response = try_get(browse,
1751 (lambda x: x['response'],
1752 lambda x: x[1]['response'])) or {}
1753
1754 if response.get('continuationContents'):
1755 break
1756
1757 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1758 if browse.get('reload'):
d92f5d5a 1759 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1760
1761 # TODO: not tested, merged from old extractor
1762 err_msg = browse.get('externalErrorMessage')
1763 if err_msg:
1764 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1765
1766 # Youtube sometimes sends incomplete data
1767 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1768 last_error = 'Incomplete data received'
1769 if count >= retries:
6a39ee13 1770 raise ExtractorError(last_error)
a1c5d2ca
M
1771
1772 if not response:
1773 break
f4f751af 1774 visitor_data = try_get(
1775 response,
1776 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1777 compat_str) or visitor_data
a1c5d2ca
M
1778
1779 known_continuation_renderers = {
1780 'itemSectionContinuation': extract_thread,
1781 'commentRepliesContinuation': extract_thread
1782 }
1783
1784 # extract next root continuation from the results
1785 continuation_contents = try_get(
1786 response, lambda x: x['continuationContents'], dict) or {}
1787
1788 for key, value in continuation_contents.items():
1789 if key not in known_continuation_renderers:
1790 continue
1791 continuation_renderer = value
1792
1793 if first_continuation:
1794 first_continuation = False
1795 expected_comment_count = try_get(
1796 continuation_renderer,
1797 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1798 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1799 compat_str)
1800
1801 if expected_comment_count:
1802 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1803 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1804 yield comment_counts[1]
1805
1806 # TODO: cli arg.
1807 # 1/True for newest, 0/False for popular (default)
1808 comment_sort_index = int(True)
1809 sort_continuation_renderer = try_get(
1810 continuation_renderer,
1811 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1812 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1813 # If this fails, the initial continuation page
1814 # starts off with popular anyways.
1815 if sort_continuation_renderer:
1816 continuation = YoutubeTabIE._build_continuation_query(
1817 continuation=sort_continuation_renderer.get('continuation'),
1818 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1819 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1820 break
1821
1822 for entry in known_continuation_renderers[key](continuation_renderer):
1823 yield entry
1824
1825 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1826 break
1827
1828 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1829 """Entry for comment extraction"""
1830 comments = []
1831 known_entry_comment_renderers = (
1832 'itemSectionRenderer',
1833 )
1834 estimated_total = 0
1835 for entry in contents:
1836 for key, renderer in entry.items():
1837 if key not in known_entry_comment_renderers:
1838 continue
1839
1840 comment_iter = self._comment_entries(
1841 renderer,
1842 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1843 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1844 ytcfg=ytcfg,
a1c5d2ca
M
1845 session_token_list=[xsrf_token])
1846
1847 for comment in comment_iter:
1848 if isinstance(comment, int):
1849 estimated_total = comment
1850 continue
1851 comments.append(comment)
1852 break
d92f5d5a 1853 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1854 return {
1855 'comments': comments,
1856 'comment_count': len(comments),
1857 }
1858
c5e8d7af 1859 def _real_extract(self, url):
cf7e015f 1860 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1861 video_id = self._match_id(url)
9297939e 1862
1863 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
1864
545cc85d 1865 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1866 webpage_url = base_url + 'watch?v=' + video_id
1867 webpage = self._download_webpage(
cce889b9 1868 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1869
9297939e 1870 def get_text(x):
1871 if not x:
1872 return
1873 text = x.get('simpleText')
1874 if text and isinstance(text, compat_str):
1875 return text
1876 runs = x.get('runs')
1877 if not isinstance(runs, list):
1878 return
1879 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
1880
1881 ytm_streaming_data = {}
1882 if is_music_url:
1883 # we are forcing to use parse_json because 141 only appeared in get_video_info.
1884 # el, c, cver, cplayer field required for 141(aac 256kbps) codec
1885 # maybe paramter of youtube music player?
1886 ytm_player_response = self._parse_json(try_get(compat_parse_qs(
1887 self._download_webpage(
1888 base_url + 'get_video_info', video_id,
fe03a6cd 1889 'Fetching youtube music info webpage',
1890 'unable to download youtube music info webpage', query={
9297939e 1891 'video_id': video_id,
1892 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1893 'el': 'detailpage',
1894 'c': 'WEB_REMIX',
1895 'cver': '0.1',
1896 'cplayer': 'UNIPLAYER'
1897 }, fatal=False)),
1898 lambda x: x['player_response'][0],
1899 compat_str) or '{}', video_id)
1900 ytm_streaming_data = ytm_player_response.get('streamingData') or {}
1901
545cc85d 1902 player_response = None
1903 if webpage:
1904 player_response = self._extract_yt_initial_variable(
1905 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1906 video_id, 'initial player response')
f4f751af 1907
1908 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1909 if not player_response:
1910 player_response = self._call_api(
f4f751af 1911 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1912
1913 playability_status = player_response.get('playabilityStatus') or {}
1914 if playability_status.get('reason') == 'Sign in to confirm your age':
1915 pr = self._parse_json(try_get(compat_parse_qs(
1916 self._download_webpage(
1917 base_url + 'get_video_info', video_id,
1918 'Refetching age-gated info webpage',
1919 'unable to download video info webpage', query={
1920 'video_id': video_id,
7c60c33e 1921 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1922 }, fatal=False)),
1923 lambda x: x['player_response'][0],
1924 compat_str) or '{}', video_id)
1925 if pr:
1926 player_response = pr
1927
1928 trailer_video_id = try_get(
1929 playability_status,
1930 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1931 compat_str)
1932 if trailer_video_id:
1933 return self.url_result(
1934 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1935
545cc85d 1936 search_meta = (
1937 lambda x: self._html_search_meta(x, webpage, default=None)) \
1938 if webpage else lambda x: None
dbdaaa23 1939
545cc85d 1940 video_details = player_response.get('videoDetails') or {}
37357d21 1941 microformat = try_get(
545cc85d 1942 player_response,
1943 lambda x: x['microformat']['playerMicroformatRenderer'],
1944 dict) or {}
1945 video_title = video_details.get('title') \
1946 or get_text(microformat.get('title')) \
1947 or search_meta(['og:title', 'twitter:title', 'title'])
1948 video_description = video_details.get('shortDescription')
cf7e015f 1949
8fe10494 1950 if not smuggled_data.get('force_singlefeed', False):
a06916d9 1951 if not self.get_param('noplaylist'):
8fe10494
S
1952 multifeed_metadata_list = try_get(
1953 player_response,
1954 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1955 compat_str)
8fe10494
S
1956 if multifeed_metadata_list:
1957 entries = []
1958 feed_ids = []
1959 for feed in multifeed_metadata_list.split(','):
1960 # Unquote should take place before split on comma (,) since textual
1961 # fields may contain comma as well (see
067aa17e 1962 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1963 feed_data = compat_parse_qs(
1964 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1965
1966 def feed_entry(name):
545cc85d 1967 return try_get(
1968 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1969
1970 feed_id = feed_entry('id')
1971 if not feed_id:
1972 continue
1973 feed_title = feed_entry('title')
1974 title = video_title
1975 if feed_title:
1976 title += ' (%s)' % feed_title
8fe10494
S
1977 entries.append({
1978 '_type': 'url_transparent',
1979 'ie_key': 'Youtube',
1980 'url': smuggle_url(
545cc85d 1981 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1982 {'force_singlefeed': True}),
6b09401b 1983 'title': title,
8fe10494 1984 })
6b09401b 1985 feed_ids.append(feed_id)
8fe10494
S
1986 self.to_screen(
1987 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1988 % (', '.join(feed_ids), video_id))
545cc85d 1989 return self.playlist_result(
1990 entries, video_id, video_title, video_description)
8fe10494
S
1991 else:
1992 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1993
9297939e 1994 formats, itags, stream_ids = [], [], []
cc2db878 1995 itag_qualities = {}
545cc85d 1996 player_url = None
d3fc8074 1997 q = qualities([
1998 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
1999 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2000 ])
9297939e 2001
545cc85d 2002 streaming_data = player_response.get('streamingData') or {}
2003 streaming_formats = streaming_data.get('formats') or []
2004 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2005 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2006 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2007
545cc85d 2008 for fmt in streaming_formats:
2009 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2010 continue
321bf820 2011
cc2db878 2012 itag = str_or_none(fmt.get('itag'))
9297939e 2013 audio_track = fmt.get('audioTrack') or {}
2014 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2015 if stream_id in stream_ids:
2016 continue
2017
cc2db878 2018 quality = fmt.get('quality')
d3fc8074 2019 if quality == 'tiny' or not quality:
2020 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2021 if itag and quality:
2022 itag_qualities[itag] = quality
2023 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2024 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2025 # number of fragment that would subsequently requested with (`&sq=N`)
2026 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2027 continue
2028
545cc85d 2029 fmt_url = fmt.get('url')
2030 if not fmt_url:
2031 sc = compat_parse_qs(fmt.get('signatureCipher'))
2032 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2033 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2034 if not (sc and fmt_url and encrypted_sig):
2035 continue
2036 if not player_url:
2037 if not webpage:
2038 continue
2039 player_url = self._search_regex(
2040 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
2041 webpage, 'player URL', fatal=False)
2042 if not player_url:
201e9eaa 2043 continue
545cc85d 2044 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2045 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2046 fmt_url += '&' + sp + '=' + signature
2047
545cc85d 2048 if itag:
2049 itags.append(itag)
9297939e 2050 stream_ids.append(stream_id)
2051
cc2db878 2052 tbr = float_or_none(
2053 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2054 dct = {
2055 'asr': int_or_none(fmt.get('audioSampleRate')),
2056 'filesize': int_or_none(fmt.get('contentLength')),
2057 'format_id': itag,
0fb983f6 2058 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2059 'fps': int_or_none(fmt.get('fps')),
2060 'height': int_or_none(fmt.get('height')),
dca3ff4a 2061 'quality': q(quality),
cc2db878 2062 'tbr': tbr,
545cc85d 2063 'url': fmt_url,
2064 'width': fmt.get('width'),
0fb983f6 2065 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2066 }
2067 mimetype = fmt.get('mimeType')
2068 if mimetype:
2069 mobj = re.match(
2070 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2071 if mobj:
2072 dct['ext'] = mimetype2ext(mobj.group(1))
2073 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2074 no_audio = dct.get('acodec') == 'none'
2075 no_video = dct.get('vcodec') == 'none'
2076 if no_audio:
2077 dct['vbr'] = tbr
2078 if no_video:
2079 dct['abr'] = tbr
2080 if no_audio or no_video:
545cc85d 2081 dct['downloader_options'] = {
2082 # Youtube throttles chunks >~10M
2083 'http_chunk_size': 10485760,
bf1317d2 2084 }
7c60c33e 2085 if dct.get('ext'):
2086 dct['container'] = dct['ext'] + '_dash'
545cc85d 2087 formats.append(dct)
2088
9297939e 2089 for sd in (streaming_data, ytm_streaming_data):
2090 hls_manifest_url = sd.get('hlsManifestUrl')
2091 if hls_manifest_url:
2092 for f in self._extract_m3u8_formats(
2093 hls_manifest_url, video_id, 'mp4', fatal=False):
2094 itag = self._search_regex(
2095 r'/itag/(\d+)', f['url'], 'itag', default=None)
2096 if itag:
2097 f['format_id'] = itag
545cc85d 2098 formats.append(f)
2099
a06916d9 2100 if self.get_param('youtube_include_dash_manifest', True):
9297939e 2101 for sd in (streaming_data, ytm_streaming_data):
2102 dash_manifest_url = sd.get('dashManifestUrl')
2103 if dash_manifest_url:
2104 for f in self._extract_mpd_formats(
2105 dash_manifest_url, video_id, fatal=False):
2106 itag = f['format_id']
2107 if itag in itags:
2108 continue
2109 if itag in itag_qualities:
9297939e 2110 f['quality'] = q(itag_qualities[itag])
2111 filesize = int_or_none(self._search_regex(
2112 r'/clen/(\d+)', f.get('fragment_base_url')
2113 or f['url'], 'file size', default=None))
2114 if filesize:
2115 f['filesize'] = filesize
2116 formats.append(f)
bf1317d2 2117
545cc85d 2118 if not formats:
a06916d9 2119 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2120 self.raise_no_formats(
545cc85d 2121 'This video is DRM protected.', expected=True)
2122 pemr = try_get(
2123 playability_status,
2124 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2125 dict) or {}
2126 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2127 subreason = pemr.get('subreason')
2128 if subreason:
2129 subreason = clean_html(get_text(subreason))
2130 if subreason == 'The uploader has not made this video available in your country.':
2131 countries = microformat.get('availableCountries')
2132 if not countries:
2133 regions_allowed = search_meta('regionsAllowed')
2134 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2135 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2136 reason += '\n' + subreason
2137 if reason:
b7da73eb 2138 self.raise_no_formats(reason, expected=True)
bf1317d2 2139
545cc85d 2140 self._sort_formats(formats)
bf1317d2 2141
545cc85d 2142 keywords = video_details.get('keywords') or []
2143 if not keywords and webpage:
2144 keywords = [
2145 unescapeHTML(m.group('content'))
2146 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2147 for keyword in keywords:
2148 if keyword.startswith('yt:stretch='):
201c1459 2149 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2150 if mobj:
2151 # NB: float is intentional for forcing float division
2152 w, h = (float(v) for v in mobj.groups())
2153 if w > 0 and h > 0:
2154 ratio = w / h
2155 for f in formats:
2156 if f.get('vcodec') != 'none':
2157 f['stretched_ratio'] = ratio
2158 break
6449cd80 2159
545cc85d 2160 thumbnails = []
2161 for container in (video_details, microformat):
2162 for thumbnail in (try_get(
2163 container,
2164 lambda x: x['thumbnail']['thumbnails'], list) or []):
2165 thumbnail_url = thumbnail.get('url')
2166 if not thumbnail_url:
bf1317d2 2167 continue
1988fab7 2168 # Sometimes youtube gives a wrong thumbnail URL. See:
2169 # https://github.com/yt-dlp/yt-dlp/issues/233
2170 # https://github.com/ytdl-org/youtube-dl/issues/28023
2171 if 'maxresdefault' in thumbnail_url:
2172 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2173 thumbnails.append({
545cc85d 2174 'url': thumbnail_url,
ff2751ac 2175 'height': int_or_none(thumbnail.get('height')),
545cc85d 2176 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2177 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2178 })
ff2751ac 2179 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2180 if thumbnail_url:
2181 thumbnails.append({
2182 'url': thumbnail_url,
2183 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2184 })
2185 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2186 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2187 thumbnails.append({
2188 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2189 'preference': 1,
2190 })
2191 self._remove_duplicate_formats(thumbnails)
545cc85d 2192
2193 category = microformat.get('category') or search_meta('genre')
2194 channel_id = video_details.get('channelId') \
2195 or microformat.get('externalChannelId') \
2196 or search_meta('channelId')
2197 duration = int_or_none(
2198 video_details.get('lengthSeconds')
2199 or microformat.get('lengthSeconds')) \
2200 or parse_duration(search_meta('duration'))
2201 is_live = video_details.get('isLive')
2202 owner_profile_url = microformat.get('ownerProfileUrl')
2203
2204 info = {
2205 'id': video_id,
2206 'title': self._live_title(video_title) if is_live else video_title,
2207 'formats': formats,
2208 'thumbnails': thumbnails,
2209 'description': video_description,
2210 'upload_date': unified_strdate(
2211 microformat.get('uploadDate')
2212 or search_meta('uploadDate')),
2213 'uploader': video_details['author'],
2214 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2215 'uploader_url': owner_profile_url,
2216 'channel_id': channel_id,
2217 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2218 'duration': duration,
2219 'view_count': int_or_none(
2220 video_details.get('viewCount')
2221 or microformat.get('viewCount')
2222 or search_meta('interactionCount')),
2223 'average_rating': float_or_none(video_details.get('averageRating')),
2224 'age_limit': 18 if (
2225 microformat.get('isFamilySafe') is False
2226 or search_meta('isFamilyFriendly') == 'false'
2227 or search_meta('og:restrictions:age') == '18+') else 0,
2228 'webpage_url': webpage_url,
2229 'categories': [category] if category else None,
2230 'tags': keywords,
2231 'is_live': is_live,
2232 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2233 'was_live': video_details.get('isLiveContent'),
545cc85d 2234 }
b477fc13 2235
545cc85d 2236 pctr = try_get(
2237 player_response,
2238 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2239 subtitles = {}
2240 if pctr:
774d79cc 2241 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2242 lang_subs = container.setdefault(lang_code, [])
545cc85d 2243 for fmt in self._SUBTITLE_FORMATS:
2244 query.update({
2245 'fmt': fmt,
2246 })
2247 lang_subs.append({
2248 'ext': fmt,
2249 'url': update_url_query(base_url, query),
774d79cc 2250 'name': sub_name,
545cc85d 2251 })
7e72694b 2252
545cc85d 2253 for caption_track in (pctr.get('captionTracks') or []):
2254 base_url = caption_track.get('baseUrl')
2255 if not base_url:
2256 continue
2257 if caption_track.get('kind') != 'asr':
120916da 2258 lang_code = (
2259 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2260 or caption_track.get('languageCode'))
545cc85d 2261 if not lang_code:
2262 continue
2263 process_language(
774d79cc 2264 subtitles, base_url, lang_code,
2265 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2266 {})
545cc85d 2267 continue
2268 automatic_captions = {}
2269 for translation_language in (pctr.get('translationLanguages') or []):
2270 translation_language_code = translation_language.get('languageCode')
2271 if not translation_language_code:
2272 continue
2273 process_language(
2274 automatic_captions, base_url, translation_language_code,
774d79cc 2275 try_get(translation_language, lambda x: x['languageName']['simpleText']),
545cc85d 2276 {'tlang': translation_language_code})
2277 info['automatic_captions'] = automatic_captions
2278 info['subtitles'] = subtitles
7e72694b 2279
545cc85d 2280 parsed_url = compat_urllib_parse_urlparse(url)
2281 for component in [parsed_url.fragment, parsed_url.query]:
2282 query = compat_parse_qs(component)
2283 for k, v in query.items():
2284 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2285 d_k += '_time'
2286 if d_k not in info and k in s_ks:
2287 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2288
2289 # Youtube Music Auto-generated description
822b9d9c 2290 if video_description:
38d70284 2291 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2292 if mobj:
822b9d9c
RA
2293 release_year = mobj.group('release_year')
2294 release_date = mobj.group('release_date')
2295 if release_date:
2296 release_date = release_date.replace('-', '')
2297 if not release_year:
545cc85d 2298 release_year = release_date[:4]
2299 info.update({
2300 'album': mobj.group('album'.strip()),
2301 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2302 'track': mobj.group('track').strip(),
2303 'release_date': release_date,
cc2db878 2304 'release_year': int_or_none(release_year),
545cc85d 2305 })
7e72694b 2306
545cc85d 2307 initial_data = None
2308 if webpage:
2309 initial_data = self._extract_yt_initial_variable(
2310 webpage, self._YT_INITIAL_DATA_RE, video_id,
2311 'yt initial data')
2312 if not initial_data:
2313 initial_data = self._call_api(
f4f751af 2314 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2315
2316 if not is_live:
2317 try:
2318 # This will error if there is no livechat
2319 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2320 info['subtitles']['live_chat'] = [{
394dcd44 2321 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2322 'video_id': video_id,
2323 'ext': 'json',
2324 'protocol': 'youtube_live_chat_replay',
2325 }]
2326 except (KeyError, IndexError, TypeError):
2327 pass
2328
2329 if initial_data:
2330 chapters = self._extract_chapters_from_json(
2331 initial_data, video_id, duration)
2332 if not chapters:
2333 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2334 contents = try_get(
2335 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2336 list)
2337 if not contents:
2338 continue
2339
2340 def chapter_time(mmlir):
2341 return parse_duration(
2342 get_text(mmlir.get('timeDescription')))
2343
2344 chapters = []
2345 for next_num, content in enumerate(contents, start=1):
2346 mmlir = content.get('macroMarkersListItemRenderer') or {}
2347 start_time = chapter_time(mmlir)
2348 end_time = chapter_time(try_get(
2349 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2350 if next_num < len(contents) else duration
2351 if start_time is None or end_time is None:
2352 continue
2353 chapters.append({
2354 'start_time': start_time,
2355 'end_time': end_time,
2356 'title': get_text(mmlir.get('title')),
2357 })
2358 if chapters:
2359 break
2360 if chapters:
2361 info['chapters'] = chapters
2362
2363 contents = try_get(
2364 initial_data,
2365 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2366 list) or []
2367 for content in contents:
2368 vpir = content.get('videoPrimaryInfoRenderer')
2369 if vpir:
2370 stl = vpir.get('superTitleLink')
2371 if stl:
2372 stl = get_text(stl)
2373 if try_get(
2374 vpir,
2375 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2376 info['location'] = stl
2377 else:
2378 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2379 if mobj:
2380 info.update({
2381 'series': mobj.group(1),
2382 'season_number': int(mobj.group(2)),
2383 'episode_number': int(mobj.group(3)),
2384 })
2385 for tlb in (try_get(
2386 vpir,
2387 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2388 list) or []):
2389 tbr = tlb.get('toggleButtonRenderer') or {}
2390 for getter, regex in [(
2391 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2392 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2393 lambda x: x['accessibility'],
2394 lambda x: x['accessibilityData']['accessibilityData'],
2395 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2396 label = (try_get(tbr, getter, dict) or {}).get('label')
2397 if label:
2398 mobj = re.match(regex, label)
2399 if mobj:
2400 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2401 break
2402 sbr_tooltip = try_get(
2403 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2404 if sbr_tooltip:
2405 like_count, dislike_count = sbr_tooltip.split(' / ')
2406 info.update({
2407 'like_count': str_to_int(like_count),
2408 'dislike_count': str_to_int(dislike_count),
2409 })
2410 vsir = content.get('videoSecondaryInfoRenderer')
2411 if vsir:
2412 info['channel'] = get_text(try_get(
2413 vsir,
2414 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2415 dict))
545cc85d 2416 rows = try_get(
2417 vsir,
2418 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2419 list) or []
2420 multiple_songs = False
2421 for row in rows:
2422 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2423 multiple_songs = True
2424 break
2425 for row in rows:
2426 mrr = row.get('metadataRowRenderer') or {}
2427 mrr_title = mrr.get('title')
2428 if not mrr_title:
2429 continue
2430 mrr_title = get_text(mrr['title'])
2431 mrr_contents_text = get_text(mrr['contents'][0])
2432 if mrr_title == 'License':
2433 info['license'] = mrr_contents_text
2434 elif not multiple_songs:
2435 if mrr_title == 'Album':
2436 info['album'] = mrr_contents_text
2437 elif mrr_title == 'Artist':
2438 info['artist'] = mrr_contents_text
2439 elif mrr_title == 'Song':
2440 info['track'] = mrr_contents_text
2441
2442 fallbacks = {
2443 'channel': 'uploader',
2444 'channel_id': 'uploader_id',
2445 'channel_url': 'uploader_url',
2446 }
2447 for to, frm in fallbacks.items():
2448 if not info.get(to):
2449 info[to] = info.get(frm)
2450
2451 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2452 v = info.get(s_k)
2453 if v:
2454 info[d_k] = v
b84071c0 2455
c224251a
M
2456 is_private = bool_or_none(video_details.get('isPrivate'))
2457 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2458 is_membersonly = None
b28f8d24 2459 is_premium = None
c224251a
M
2460 if initial_data and is_private is not None:
2461 is_membersonly = False
b28f8d24 2462 is_premium = False
c224251a
M
2463 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2464 for content in contents or []:
2465 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2466 for badge in badges or []:
2467 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2468 if label.lower() == 'members only':
2469 is_membersonly = True
2470 break
b28f8d24
M
2471 elif label.lower() == 'premium':
2472 is_premium = True
2473 break
2474 if is_membersonly or is_premium:
c224251a
M
2475 break
2476
2477 # TODO: Add this for playlists
2478 info['availability'] = self._availability(
2479 is_private=is_private,
b28f8d24 2480 needs_premium=is_premium,
c224251a
M
2481 needs_subscription=is_membersonly,
2482 needs_auth=info['age_limit'] >= 18,
2483 is_unlisted=None if is_private is None else is_unlisted)
2484
06167fbb 2485 # get xsrf for annotations or comments
a06916d9 2486 get_annotations = self.get_param('writeannotations', False)
2487 get_comments = self.get_param('getcomments', False)
06167fbb 2488 if get_annotations or get_comments:
29f7c58a 2489 xsrf_token = None
545cc85d 2490 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2491 if ytcfg:
2492 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2493 if not xsrf_token:
2494 xsrf_token = self._search_regex(
2495 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2496 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2497
2498 # annotations
06167fbb 2499 if get_annotations:
64b6a4e9
RA
2500 invideo_url = try_get(
2501 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2502 if xsrf_token and invideo_url:
29f7c58a 2503 xsrf_field_name = None
2504 if ytcfg:
2505 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2506 if not xsrf_field_name:
2507 xsrf_field_name = self._search_regex(
2508 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2509 webpage, 'xsrf field name',
29f7c58a 2510 group='xsrf_field_name', default='session_token')
8a784c74 2511 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2512 self._proto_relative_url(invideo_url),
2513 video_id, note='Downloading annotations',
2514 errnote='Unable to download video annotations', fatal=False,
2515 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2516
277d6ff5 2517 if get_comments:
a1c5d2ca 2518 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2519
545cc85d 2520 self.mark_watched(video_id, player_response)
d77ab8e2 2521
545cc85d 2522 return info
c5e8d7af 2523
5f6a1245 2524
8bdd16b4 2525class YoutubeTabIE(YoutubeBaseInfoExtractor):
2526 IE_DESC = 'YouTube.com tab'
70d5c17b 2527 _VALID_URL = r'''(?x)
2528 https?://
2529 (?:\w+\.)?
2530 (?:
2531 youtube(?:kids)?\.com|
2532 invidio\.us
2533 )/
2534 (?:
fe03a6cd 2535 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 2536 (?P<not_channel>
9ba5705a 2537 feed/|hashtag/|
70d5c17b 2538 (?:playlist|watch)\?.*?\blist=
2539 )|
29f7c58a 2540 (?!(?:%s)\b) # Direct URLs
70d5c17b 2541 )
2542 (?P<id>[^/?\#&]+)
2543 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2544 IE_NAME = 'youtube:tab'
2545
81127aa5 2546 _TESTS = [{
da692b79 2547 'note': 'playlists, multipage',
8bdd16b4 2548 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2549 'playlist_mincount': 94,
2550 'info_dict': {
2551 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2552 'title': 'Игорь Клейнер - Playlists',
2553 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2554 'uploader': 'Игорь Клейнер',
2555 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2556 },
2557 }, {
da692b79 2558 'note': 'playlists, multipage, different order',
8bdd16b4 2559 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2560 'playlist_mincount': 94,
2561 'info_dict': {
2562 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2563 'title': 'Игорь Клейнер - Playlists',
2564 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2565 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2566 'uploader': 'Игорь Клейнер',
8bdd16b4 2567 },
201c1459 2568 }, {
da692b79 2569 'note': 'playlists, series',
201c1459 2570 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2571 'playlist_mincount': 5,
2572 'info_dict': {
2573 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2574 'title': '3Blue1Brown - Playlists',
2575 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 2576 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2577 'uploader': '3Blue1Brown',
201c1459 2578 },
8bdd16b4 2579 }, {
da692b79 2580 'note': 'playlists, singlepage',
8bdd16b4 2581 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2582 'playlist_mincount': 4,
2583 'info_dict': {
2584 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2585 'title': 'ThirstForScience - Playlists',
2586 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2587 'uploader': 'ThirstForScience',
2588 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2589 }
2590 }, {
2591 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2592 'only_matching': True,
2593 }, {
da692b79 2594 'note': 'basic, single video playlist',
0e30a7b9 2595 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2596 'info_dict': {
0e30a7b9 2597 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2598 'uploader': 'Sergey M.',
2599 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2600 'title': 'youtube-dl public playlist',
81127aa5 2601 },
0e30a7b9 2602 'playlist_count': 1,
9291475f 2603 }, {
da692b79 2604 'note': 'empty playlist',
0e30a7b9 2605 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2606 'info_dict': {
0e30a7b9 2607 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2608 'uploader': 'Sergey M.',
2609 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2610 'title': 'youtube-dl empty playlist',
9291475f
PH
2611 },
2612 'playlist_count': 0,
2613 }, {
da692b79 2614 'note': 'Home tab',
8bdd16b4 2615 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2616 'info_dict': {
8bdd16b4 2617 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2618 'title': 'lex will - Home',
2619 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2620 'uploader': 'lex will',
2621 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2622 },
8bdd16b4 2623 'playlist_mincount': 2,
9291475f 2624 }, {
da692b79 2625 'note': 'Videos tab',
8bdd16b4 2626 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2627 'info_dict': {
8bdd16b4 2628 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2629 'title': 'lex will - Videos',
2630 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2631 'uploader': 'lex will',
2632 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2633 },
8bdd16b4 2634 'playlist_mincount': 975,
9291475f 2635 }, {
da692b79 2636 'note': 'Videos tab, sorted by popular',
8bdd16b4 2637 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2638 'info_dict': {
8bdd16b4 2639 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2640 'title': 'lex will - Videos',
2641 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2642 'uploader': 'lex will',
2643 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2644 },
8bdd16b4 2645 'playlist_mincount': 199,
9291475f 2646 }, {
da692b79 2647 'note': 'Playlists tab',
8bdd16b4 2648 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2649 'info_dict': {
8bdd16b4 2650 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2651 'title': 'lex will - Playlists',
2652 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2653 'uploader': 'lex will',
2654 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2655 },
8bdd16b4 2656 'playlist_mincount': 17,
ac7553d0 2657 }, {
da692b79 2658 'note': 'Community tab',
8bdd16b4 2659 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2660 'info_dict': {
8bdd16b4 2661 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2662 'title': 'lex will - Community',
2663 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2664 'uploader': 'lex will',
2665 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2666 },
2667 'playlist_mincount': 18,
87dadd45 2668 }, {
da692b79 2669 'note': 'Channels tab',
8bdd16b4 2670 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2671 'info_dict': {
8bdd16b4 2672 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2673 'title': 'lex will - Channels',
2674 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2675 'uploader': 'lex will',
2676 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2677 },
deaec5af 2678 'playlist_mincount': 12,
cd684175 2679 }, {
2680 'note': 'Search tab',
2681 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
2682 'playlist_mincount': 40,
2683 'info_dict': {
2684 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2685 'title': '3Blue1Brown - Search - linear algebra',
2686 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2687 'uploader': '3Blue1Brown',
2688 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2689 },
6b08cdf6 2690 }, {
a0566bbf 2691 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2692 'only_matching': True,
2693 }, {
a0566bbf 2694 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2695 'only_matching': True,
2696 }, {
a0566bbf 2697 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2698 'only_matching': True,
2699 }, {
2700 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2701 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2702 'info_dict': {
2703 'title': '29C3: Not my department',
2704 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2705 'uploader': 'Christiaan008',
2706 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2707 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2708 },
2709 'playlist_count': 96,
2710 }, {
2711 'note': 'Large playlist',
2712 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2713 'info_dict': {
8bdd16b4 2714 'title': 'Uploads from Cauchemar',
2715 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2716 'uploader': 'Cauchemar',
2717 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2718 },
8bdd16b4 2719 'playlist_mincount': 1123,
2720 }, {
da692b79 2721 'note': 'even larger playlist, 8832 videos',
8bdd16b4 2722 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2723 'only_matching': True,
4b7df0d3
JMF
2724 }, {
2725 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2726 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2727 'info_dict': {
acf757f4
PH
2728 'title': 'Uploads from Interstellar Movie',
2729 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2730 'uploader': 'Interstellar Movie',
8bdd16b4 2731 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2732 },
481cc733 2733 'playlist_mincount': 21,
358de58c 2734 }, {
2735 'note': 'Playlist with "show unavailable videos" button',
2736 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2737 'info_dict': {
2738 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2739 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2740 'uploader': 'Phim Siêu Nhân Nhật Bản',
2741 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2742 },
da692b79 2743 'playlist_mincount': 200,
5d342002 2744 }, {
da692b79 2745 'note': 'Playlist with unavailable videos in page 7',
5d342002 2746 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2747 'info_dict': {
2748 'title': 'Uploads from BlankTV',
2749 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2750 'uploader': 'BlankTV',
2751 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2752 },
da692b79 2753 'playlist_mincount': 1000,
8bdd16b4 2754 }, {
da692b79 2755 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 2756 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2757 'info_dict': {
2758 'title': 'Data Analysis with Dr Mike Pound',
2759 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2760 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2761 'uploader': 'Computerphile',
deaec5af 2762 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2763 },
2764 'playlist_mincount': 11,
2765 }, {
a0566bbf 2766 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2767 'only_matching': True,
dacb3a86 2768 }, {
da692b79 2769 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
2770 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2771 'info_dict': {
2772 'id': 'FqZTN594JQw',
2773 'ext': 'webm',
2774 'title': "Smiley's People 01 detective, Adventure Series, Action",
2775 'uploader': 'STREEM',
2776 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2777 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2778 'upload_date': '20150526',
2779 'license': 'Standard YouTube License',
2780 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2781 'categories': ['People & Blogs'],
2782 'tags': list,
dbdaaa23 2783 'view_count': int,
dacb3a86
S
2784 'like_count': int,
2785 'dislike_count': int,
2786 },
2787 'params': {
2788 'skip_download': True,
2789 },
13a75688 2790 'skip': 'This video is not available.',
dacb3a86 2791 'add_ie': [YoutubeIE.ie_key()],
481cc733 2792 }, {
8bdd16b4 2793 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2794 'only_matching': True,
66b48727 2795 }, {
8bdd16b4 2796 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2797 'only_matching': True,
a0566bbf 2798 }, {
2799 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2800 'info_dict': {
da692b79 2801 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 2802 'ext': 'mp4',
deaec5af 2803 'title': compat_str,
a0566bbf 2804 'uploader': 'Sky News',
2805 'uploader_id': 'skynews',
2806 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 2807 'upload_date': r're:\d{8}',
2808 'description': compat_str,
a0566bbf 2809 'categories': ['News & Politics'],
2810 'tags': list,
2811 'like_count': int,
2812 'dislike_count': int,
2813 },
2814 'params': {
2815 'skip_download': True,
2816 },
da692b79 2817 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 2818 }, {
2819 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2820 'info_dict': {
2821 'id': 'a48o2S1cPoo',
2822 'ext': 'mp4',
2823 'title': 'The Young Turks - Live Main Show',
2824 'uploader': 'The Young Turks',
2825 'uploader_id': 'TheYoungTurks',
2826 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2827 'upload_date': '20150715',
2828 'license': 'Standard YouTube License',
2829 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2830 'categories': ['News & Politics'],
2831 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2832 'like_count': int,
2833 'dislike_count': int,
2834 },
2835 'params': {
2836 'skip_download': True,
2837 },
2838 'only_matching': True,
2839 }, {
2840 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2841 'only_matching': True,
2842 }, {
2843 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2844 'only_matching': True,
3d3dddc9 2845 }, {
2846 'url': 'https://www.youtube.com/feed/trending',
2847 'only_matching': True,
2848 }, {
3d3dddc9 2849 'url': 'https://www.youtube.com/feed/library',
2850 'only_matching': True,
2851 }, {
3d3dddc9 2852 'url': 'https://www.youtube.com/feed/history',
2853 'only_matching': True,
2854 }, {
3d3dddc9 2855 'url': 'https://www.youtube.com/feed/subscriptions',
2856 'only_matching': True,
2857 }, {
3d3dddc9 2858 'url': 'https://www.youtube.com/feed/watch_later',
2859 'only_matching': True,
2860 }, {
da692b79 2861 'note': 'Recommended - redirects to home page',
3d3dddc9 2862 'url': 'https://www.youtube.com/feed/recommended',
2863 'only_matching': True,
29f7c58a 2864 }, {
da692b79 2865 'note': 'inline playlist with not always working continuations',
29f7c58a 2866 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2867 'only_matching': True,
2868 }, {
2869 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2870 'only_matching': True,
2871 }, {
2872 'url': 'https://www.youtube.com/course',
2873 'only_matching': True,
2874 }, {
2875 'url': 'https://www.youtube.com/zsecurity',
2876 'only_matching': True,
2877 }, {
2878 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2879 'only_matching': True,
2880 }, {
2881 'url': 'https://www.youtube.com/TheYoungTurks/live',
2882 'only_matching': True,
39ed931e 2883 }, {
2884 'url': 'https://www.youtube.com/hashtag/cctv9',
2885 'info_dict': {
2886 'id': 'cctv9',
2887 'title': '#cctv9',
2888 },
2889 'playlist_mincount': 350,
201c1459 2890 }, {
2891 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2892 'only_matching': True,
9297939e 2893 }, {
da692b79 2894 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 2895 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2896 'only_matching': True
fe03a6cd 2897 }, {
2898 'note': '/browse/ should redirect to /channel/',
2899 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
2900 'only_matching': True
2901 }, {
2902 'note': 'VLPL, should redirect to playlist?list=PL...',
2903 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2904 'info_dict': {
2905 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2906 'uploader': 'NoCopyrightSounds',
2907 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
2908 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
2909 'title': 'NCS Releases',
2910 },
2911 'playlist_mincount': 166,
18db7548 2912 }, {
2913 'note': 'Topic, should redirect to playlist?list=UU...',
2914 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
2915 'info_dict': {
2916 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
2917 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
2918 'title': 'Uploads from Royalty Free Music - Topic',
2919 'uploader': 'Royalty Free Music - Topic',
2920 },
2921 'expected_warnings': [
2922 'A channel/user page was given',
2923 'The URL does not have a videos tab',
2924 ],
2925 'playlist_mincount': 101,
2926 }, {
2927 'note': 'Topic without a UU playlist',
2928 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
2929 'info_dict': {
2930 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
2931 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
2932 },
2933 'expected_warnings': [
2934 'A channel/user page was given',
2935 'The URL does not have a videos tab',
2936 'Falling back to channel URL',
2937 ],
2938 'playlist_mincount': 9,
abcdd12b 2939 }, {
2940 'note': 'Youtube music Album',
2941 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
2942 'info_dict': {
2943 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
2944 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
2945 },
2946 'playlist_count': 50,
29f7c58a 2947 }]
2948
2949 @classmethod
2950 def suitable(cls, url):
2951 return False if YoutubeIE.suitable(url) else super(
2952 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2953
2954 def _extract_channel_id(self, webpage):
2955 channel_id = self._html_search_meta(
2956 'channelId', webpage, 'channel id', default=None)
2957 if channel_id:
2958 return channel_id
2959 channel_url = self._html_search_meta(
2960 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2961 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2962 'twitter:app:url:googleplay'), webpage, 'channel url')
2963 return self._search_regex(
2964 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2965 channel_url, 'channel id')
15f6397c 2966
8bdd16b4 2967 @staticmethod
cd7c66cf 2968 def _extract_basic_item_renderer(item):
2969 # Modified from _extract_grid_item_renderer
201c1459 2970 known_basic_renderers = (
2971 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 2972 )
2973 for key, renderer in item.items():
201c1459 2974 if not isinstance(renderer, dict):
cd7c66cf 2975 continue
201c1459 2976 elif key in known_basic_renderers:
2977 return renderer
2978 elif key.startswith('grid') and key.endswith('Renderer'):
2979 return renderer
8bdd16b4 2980
8bdd16b4 2981 def _grid_entries(self, grid_renderer):
2982 for item in grid_renderer['items']:
2983 if not isinstance(item, dict):
39b62db1 2984 continue
cd7c66cf 2985 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2986 if not isinstance(renderer, dict):
2987 continue
2988 title = try_get(
201c1459 2989 renderer, (lambda x: x['title']['runs'][0]['text'],
2990 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 2991 # playlist
2992 playlist_id = renderer.get('playlistId')
2993 if playlist_id:
2994 yield self.url_result(
2995 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2996 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2997 video_title=title)
201c1459 2998 continue
8bdd16b4 2999 # video
3000 video_id = renderer.get('videoId')
3001 if video_id:
3002 yield self._extract_video(renderer)
201c1459 3003 continue
8bdd16b4 3004 # channel
3005 channel_id = renderer.get('channelId')
3006 if channel_id:
3007 title = try_get(
3008 renderer, lambda x: x['title']['simpleText'], compat_str)
3009 yield self.url_result(
3010 'https://www.youtube.com/channel/%s' % channel_id,
3011 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3012 continue
3013 # generic endpoint URL support
3014 ep_url = urljoin('https://www.youtube.com/', try_get(
3015 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3016 compat_str))
3017 if ep_url:
3018 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3019 if ie.suitable(ep_url):
3020 yield self.url_result(
3021 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3022 break
8bdd16b4 3023
3d3dddc9 3024 def _shelf_entries_from_content(self, shelf_renderer):
3025 content = shelf_renderer.get('content')
3026 if not isinstance(content, dict):
8bdd16b4 3027 return
cd7c66cf 3028 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3029 if renderer:
3030 # TODO: add support for nested playlists so each shelf is processed
3031 # as separate playlist
3032 # TODO: this includes only first N items
3033 for entry in self._grid_entries(renderer):
3034 yield entry
3035 renderer = content.get('horizontalListRenderer')
3036 if renderer:
3037 # TODO
3038 pass
8bdd16b4 3039
29f7c58a 3040 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3041 ep = try_get(
3042 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3043 compat_str)
3044 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3045 if shelf_url:
29f7c58a 3046 # Skipping links to another channels, note that checking for
3047 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3048 # will not work
3049 if skip_channels and '/channels?' in shelf_url:
3050 return
3d3dddc9 3051 title = try_get(
3052 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3053 yield self.url_result(shelf_url, video_title=title)
3054 # Shelf may not contain shelf URL, fallback to extraction from content
3055 for entry in self._shelf_entries_from_content(shelf_renderer):
3056 yield entry
c5e8d7af 3057
8bdd16b4 3058 def _playlist_entries(self, video_list_renderer):
3059 for content in video_list_renderer['contents']:
3060 if not isinstance(content, dict):
3061 continue
3062 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3063 if not isinstance(renderer, dict):
3064 continue
3065 video_id = renderer.get('videoId')
3066 if not video_id:
3067 continue
3068 yield self._extract_video(renderer)
07aeced6 3069
3462ffa8 3070 def _rich_entries(self, rich_grid_renderer):
3071 renderer = try_get(
70d5c17b 3072 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3073 video_id = renderer.get('videoId')
3074 if not video_id:
3075 return
3076 yield self._extract_video(renderer)
3077
8bdd16b4 3078 def _video_entry(self, video_renderer):
3079 video_id = video_renderer.get('videoId')
3080 if video_id:
3081 return self._extract_video(video_renderer)
dacb3a86 3082
8bdd16b4 3083 def _post_thread_entries(self, post_thread_renderer):
3084 post_renderer = try_get(
3085 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3086 if not post_renderer:
3087 return
3088 # video attachment
3089 video_renderer = try_get(
895b0931 3090 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3091 video_id = video_renderer.get('videoId')
3092 if video_id:
3093 entry = self._extract_video(video_renderer)
8bdd16b4 3094 if entry:
3095 yield entry
895b0931 3096 # playlist attachment
3097 playlist_id = try_get(
3098 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3099 if playlist_id:
3100 yield self.url_result(
e28f1c0a 3101 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3102 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3103 # inline video links
3104 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3105 for run in runs:
3106 if not isinstance(run, dict):
3107 continue
3108 ep_url = try_get(
3109 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3110 if not ep_url:
3111 continue
3112 if not YoutubeIE.suitable(ep_url):
3113 continue
3114 ep_video_id = YoutubeIE._match_id(ep_url)
3115 if video_id == ep_video_id:
3116 continue
895b0931 3117 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3118
8bdd16b4 3119 def _post_thread_continuation_entries(self, post_thread_continuation):
3120 contents = post_thread_continuation.get('contents')
3121 if not isinstance(contents, list):
3122 return
3123 for content in contents:
3124 renderer = content.get('backstagePostThreadRenderer')
3125 if not isinstance(renderer, dict):
3126 continue
3127 for entry in self._post_thread_entries(renderer):
3128 yield entry
07aeced6 3129
39ed931e 3130 r''' # unused
3131 def _rich_grid_entries(self, contents):
3132 for content in contents:
3133 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3134 if video_renderer:
3135 entry = self._video_entry(video_renderer)
3136 if entry:
3137 yield entry
3138 '''
3139
29f7c58a 3140 @staticmethod
3141 def _build_continuation_query(continuation, ctp=None):
3142 query = {
3143 'ctoken': continuation,
3144 'continuation': continuation,
3145 }
3146 if ctp:
3147 query['itct'] = ctp
3148 return query
3149
8bdd16b4 3150 @staticmethod
3151 def _extract_next_continuation_data(renderer):
3152 next_continuation = try_get(
3153 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3154 if not next_continuation:
3155 return
3156 continuation = next_continuation.get('continuation')
3157 if not continuation:
3158 return
3159 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3160 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3161
8bdd16b4 3162 @classmethod
3163 def _extract_continuation(cls, renderer):
3164 next_continuation = cls._extract_next_continuation_data(renderer)
3165 if next_continuation:
3166 return next_continuation
cc2db878 3167 contents = []
3168 for key in ('contents', 'items'):
3169 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3170 for content in contents:
3171 if not isinstance(content, dict):
3172 continue
3173 continuation_ep = try_get(
3174 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3175 dict)
3176 if not continuation_ep:
3177 continue
3178 continuation = try_get(
3179 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3180 if not continuation:
3181 continue
3182 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3183 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3184
f4f751af 3185 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3186
70d5c17b 3187 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3188 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3189 for content in contents:
3190 if not isinstance(content, dict):
8bdd16b4 3191 continue
70d5c17b 3192 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3193 if not is_renderer:
70d5c17b 3194 renderer = content.get('richItemRenderer')
3462ffa8 3195 if renderer:
3196 for entry in self._rich_entries(renderer):
3197 yield entry
3198 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3199 continue
3462ffa8 3200 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3201 for isr_content in isr_contents:
3202 if not isinstance(isr_content, dict):
3203 continue
69184e41 3204
3205 known_renderers = {
3206 'playlistVideoListRenderer': self._playlist_entries,
3207 'gridRenderer': self._grid_entries,
3208 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3209 'backstagePostThreadRenderer': self._post_thread_entries,
3210 'videoRenderer': lambda x: [self._video_entry(x)],
3211 }
3212 for key, renderer in isr_content.items():
3213 if key not in known_renderers:
3214 continue
3215 for entry in known_renderers[key](renderer):
3216 if entry:
3217 yield entry
3462ffa8 3218 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3219 break
70d5c17b 3220
3462ffa8 3221 if not continuation_list[0]:
3222 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3223
3224 if not continuation_list[0]:
3225 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3226
3227 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3228 tab_content = try_get(tab, lambda x: x['content'], dict)
3229 if not tab_content:
3230 return
3462ffa8 3231 parent_renderer = (
29f7c58a 3232 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3233 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3234 for entry in extract_entries(parent_renderer):
3235 yield entry
3462ffa8 3236 continuation = continuation_list[0]
f4f751af 3237 context = self._extract_context(ytcfg)
3238 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3239
8bdd16b4 3240 for page_num in itertools.count(1):
3241 if not continuation:
3242 break
79360d99 3243 query = {
3244 'continuation': continuation['continuation'],
3245 'clickTracking': {'clickTrackingParams': continuation['itct']}
3246 }
f4f751af 3247 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3248 response = self._extract_response(
3249 item_id='%s page %s' % (item_id, page_num),
3250 query=query, headers=headers, ytcfg=ytcfg,
3251 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3252
3253 if not response:
8bdd16b4 3254 break
f4f751af 3255 visitor_data = try_get(
3256 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3257
69184e41 3258 known_continuation_renderers = {
3259 'playlistVideoListContinuation': self._playlist_entries,
3260 'gridContinuation': self._grid_entries,
3261 'itemSectionContinuation': self._post_thread_continuation_entries,
3262 'sectionListContinuation': extract_entries, # for feeds
3263 }
8bdd16b4 3264 continuation_contents = try_get(
69184e41 3265 response, lambda x: x['continuationContents'], dict) or {}
3266 continuation_renderer = None
3267 for key, value in continuation_contents.items():
3268 if key not in known_continuation_renderers:
3462ffa8 3269 continue
69184e41 3270 continuation_renderer = value
3271 continuation_list = [None]
3272 for entry in known_continuation_renderers[key](continuation_renderer):
3273 yield entry
3274 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3275 break
3276 if continuation_renderer:
3277 continue
c5e8d7af 3278
a1b535bd 3279 known_renderers = {
3280 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3281 'gridVideoRenderer': (self._grid_entries, 'items'),
3282 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3283 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3284 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3285 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3286 }
cce889b9 3287 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3288 continuation_items = try_get(
cce889b9 3289 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3290 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3291 video_items_renderer = None
3292 for key, value in continuation_item.items():
3293 if key not in known_renderers:
8bdd16b4 3294 continue
a1b535bd 3295 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3296 continuation_list = [None]
a1b535bd 3297 for entry in known_renderers[key][0](video_items_renderer):
3298 yield entry
9ba5705a 3299 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3300 break
3301 if video_items_renderer:
3302 continue
8bdd16b4 3303 break
9558dcec 3304
8bdd16b4 3305 @staticmethod
3306 def _extract_selected_tab(tabs):
3307 for tab in tabs:
cd684175 3308 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3309 if renderer.get('selected') is True:
3310 return renderer
2b3c2546 3311 else:
8bdd16b4 3312 raise ExtractorError('Unable to find selected tab')
b82f815f 3313
8bdd16b4 3314 @staticmethod
3315 def _extract_uploader(data):
3316 uploader = {}
3317 sidebar_renderer = try_get(
3318 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3319 if sidebar_renderer:
3320 for item in sidebar_renderer:
3321 if not isinstance(item, dict):
3322 continue
3323 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3324 if not isinstance(renderer, dict):
3325 continue
3326 owner = try_get(
3327 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3328 if owner:
3329 uploader['uploader'] = owner.get('text')
3330 uploader['uploader_id'] = try_get(
3331 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3332 uploader['uploader_url'] = urljoin(
3333 'https://www.youtube.com/',
3334 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3335 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3336
d069eca7 3337 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3338 playlist_id = title = description = channel_url = channel_name = channel_id = None
3339 thumbnails_list = tags = []
3340
8bdd16b4 3341 selected_tab = self._extract_selected_tab(tabs)
3342 renderer = try_get(
3343 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3344 if renderer:
b60419c5 3345 channel_name = renderer.get('title')
3346 channel_url = renderer.get('channelUrl')
3347 channel_id = renderer.get('externalId')
39ed931e 3348 else:
64c0d954 3349 renderer = try_get(
3350 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3351
8bdd16b4 3352 if renderer:
3353 title = renderer.get('title')
ecc97af3 3354 description = renderer.get('description', '')
b60419c5 3355 playlist_id = channel_id
3356 tags = renderer.get('keywords', '').split()
3357 thumbnails_list = (
3358 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3359 or try_get(
3360 data,
3361 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3362 list)
b60419c5 3363 or [])
3364
3365 thumbnails = []
3366 for t in thumbnails_list:
3367 if not isinstance(t, dict):
3368 continue
3369 thumbnail_url = url_or_none(t.get('url'))
3370 if not thumbnail_url:
3371 continue
3372 thumbnails.append({
3373 'url': thumbnail_url,
3374 'width': int_or_none(t.get('width')),
3375 'height': int_or_none(t.get('height')),
3376 })
3462ffa8 3377 if playlist_id is None:
70d5c17b 3378 playlist_id = item_id
3379 if title is None:
39ed931e 3380 title = (
3381 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3382 or playlist_id)
b60419c5 3383 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3384 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3385
3386 metadata = {
3387 'playlist_id': playlist_id,
3388 'playlist_title': title,
3389 'playlist_description': description,
3390 'uploader': channel_name,
3391 'uploader_id': channel_id,
3392 'uploader_url': channel_url,
3393 'thumbnails': thumbnails,
3394 'tags': tags,
3395 }
3396 if not channel_id:
3397 metadata.update(self._extract_uploader(data))
3398 metadata.update({
3399 'channel': metadata['uploader'],
3400 'channel_id': metadata['uploader_id'],
3401 'channel_url': metadata['uploader_url']})
3402 return self.playlist_result(
d069eca7
M
3403 self._entries(
3404 selected_tab, playlist_id,
3405 self._extract_identity_token(webpage, item_id),
f4f751af 3406 self._extract_account_syncid(data),
3407 self._extract_ytcfg(item_id, webpage)),
b60419c5 3408 **metadata)
73c4ac2c 3409
79360d99 3410 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3411 first_id = last_id = None
79360d99 3412 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3413 headers = self._generate_api_headers(
3414 ytcfg, account_syncid=self._extract_account_syncid(data),
3415 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3416 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3417 for page_num in itertools.count(1):
cd7c66cf 3418 videos = list(self._playlist_entries(playlist))
3419 if not videos:
3420 return
2be71994 3421 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3422 if start >= len(videos):
3423 return
3424 for video in videos[start:]:
3425 if video['id'] == first_id:
3426 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3427 return
3428 yield video
3429 first_id = first_id or videos[0]['id']
3430 last_id = videos[-1]['id']
79360d99 3431 watch_endpoint = try_get(
3432 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3433 query = {
3434 'playlistId': playlist_id,
3435 'videoId': watch_endpoint.get('videoId') or last_id,
3436 'index': watch_endpoint.get('index') or len(videos),
3437 'params': watch_endpoint.get('params') or 'OAE%3D'
3438 }
3439 response = self._extract_response(
3440 item_id='%s page %d' % (playlist_id, page_num),
3441 query=query,
3442 ep='next',
3443 headers=headers,
3444 check_get_keys='contents'
3445 )
cd7c66cf 3446 playlist = try_get(
79360d99 3447 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3448
79360d99 3449 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3450 title = playlist.get('title') or try_get(
3451 data, lambda x: x['titleText']['simpleText'], compat_str)
3452 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3453
3454 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3455 playlist_url = urljoin(url, try_get(
3456 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3457 compat_str))
3458 if playlist_url and playlist_url != url:
3459 return self.url_result(
3460 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3461 video_title=title)
cd7c66cf 3462
8bdd16b4 3463 return self.playlist_result(
79360d99 3464 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3465 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3466
95c01b6c 3467 @staticmethod
3468 def _extract_alerts(data):
3469 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3470 if not isinstance(alert_dict, dict):
3471 continue
3472 for alert in alert_dict.values():
3473 alert_type = alert.get('type')
3474 if not alert_type:
02ced43c 3475 continue
95c01b6c 3476 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
3477 if message:
3478 yield alert_type, message
3479 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3480 message += try_get(run, lambda x: x['text'], compat_str)
3481 if message:
3482 yield alert_type, message
3483
3484 def _report_alerts(self, alerts, expected=True):
3ffc7c89 3485 errors = []
3486 warnings = []
95c01b6c 3487 for alert_type, alert_message in alerts:
f3eaa8dd 3488 if alert_type.lower() == 'error':
3ffc7c89 3489 errors.append([alert_type, alert_message])
f3eaa8dd 3490 else:
3ffc7c89 3491 warnings.append([alert_type, alert_message])
f3eaa8dd 3492
3ffc7c89 3493 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3494 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3495 if errors:
3496 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3497
95c01b6c 3498 def _extract_and_report_alerts(self, data, *args, **kwargs):
3499 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
3500
358de58c 3501 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3502 """
3503 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3504 """
3505 sidebar_renderer = try_get(
5d342002 3506 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3507 if not sidebar_renderer:
3508 return
3509 browse_id = params = None
358de58c 3510 for item in sidebar_renderer:
3511 if not isinstance(item, dict):
3512 continue
3513 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3514 menu_renderer = try_get(
3515 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3516 for menu_item in menu_renderer:
3517 if not isinstance(menu_item, dict):
3518 continue
3519 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3520 text = try_get(
3521 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3522 if not text or text.lower() != 'show unavailable videos':
3523 continue
3524 browse_endpoint = try_get(
3525 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3526 browse_id = browse_endpoint.get('browseId')
3527 params = browse_endpoint.get('params')
5d342002 3528 break
3529
3530 ytcfg = self._extract_ytcfg(item_id, webpage)
3531 headers = self._generate_api_headers(
3532 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3533 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3534 visitor_data=try_get(
3535 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3536 query = {
3537 'params': params or 'wgYCCAA=',
3538 'browseId': browse_id or 'VL%s' % item_id
3539 }
3540 return self._extract_response(
3541 item_id=item_id, headers=headers, query=query,
3542 check_get_keys='contents', fatal=False,
3543 note='Downloading API JSON with unavailable videos')
358de58c 3544
79360d99 3545 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3546 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3547 response = None
3548 last_error = None
3549 count = -1
a06916d9 3550 retries = self.get_param('extractor_retries', 3)
79360d99 3551 if check_get_keys is None:
3552 check_get_keys = []
3553 while count < retries:
3554 count += 1
3555 if last_error:
3556 self.report_warning('%s. Retrying ...' % last_error)
3557 try:
3558 response = self._call_api(
3559 ep=ep, fatal=True, headers=headers,
358de58c 3560 video_id=item_id, query=query,
79360d99 3561 context=self._extract_context(ytcfg),
3562 api_key=self._extract_api_key(ytcfg),
3563 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3564 except ExtractorError as e:
3565 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3566 # Downloading page may result in intermittent 5xx HTTP error
3567 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3568 last_error = 'HTTP Error %s' % e.cause.code
3569 if count < retries:
3570 continue
358de58c 3571 if fatal:
3572 raise
3573 else:
3574 self.report_warning(error_to_compat_str(e))
3575 return
3576
79360d99 3577 else:
3578 # Youtube may send alerts if there was an issue with the continuation page
95c01b6c 3579 self._extract_and_report_alerts(response, expected=False)
79360d99 3580 if not check_get_keys or dict_get(response, check_get_keys):
3581 break
3582 # Youtube sometimes sends incomplete data
3583 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3584 last_error = 'Incomplete data received'
3585 if count >= retries:
358de58c 3586 if fatal:
3587 raise ExtractorError(last_error)
3588 else:
3589 self.report_warning(last_error)
3590 return
79360d99 3591 return response
3592
cd7c66cf 3593 def _extract_webpage(self, url, item_id):
a06916d9 3594 retries = self.get_param('extractor_retries', 3)
62bff2c1 3595 count = -1
c705177d 3596 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3597 while count < retries:
62bff2c1 3598 count += 1
14fdfea9 3599 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3600 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3601 if count:
c705177d 3602 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3603 webpage = self._download_webpage(
3604 url, item_id,
cd7c66cf 3605 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3606 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3607 if data.get('contents') or data.get('currentVideoEndpoint'):
3608 break
95c01b6c 3609 # Extract alerts here only when there is error
3610 self._extract_and_report_alerts(data)
c705177d 3611 if count >= retries:
6a39ee13 3612 raise ExtractorError(last_error)
cd7c66cf 3613 return webpage, data
3614
9297939e 3615 @staticmethod
3616 def _smuggle_data(entries, data):
3617 for entry in entries:
3618 if data:
3619 entry['url'] = smuggle_url(entry['url'], data)
3620 yield entry
3621
cd7c66cf 3622 def _real_extract(self, url):
9297939e 3623 url, smuggled_data = unsmuggle_url(url, {})
3624 if self.is_music_url(url):
3625 smuggled_data['is_music_url'] = True
fe03a6cd 3626 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3627 if info_dict.get('entries'):
3628 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3629 return info_dict
3630
fe03a6cd 3631 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3632
3633 def __real_extract(self, url, smuggled_data):
cd7c66cf 3634 item_id = self._match_id(url)
3635 url = compat_urlparse.urlunparse(
3636 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3637 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3638
fe03a6cd 3639 def get_mobj(url):
3640 mobj = self._url_re.match(url).groupdict()
07cce701 3641 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 3642 return mobj
3643
3644 mobj = get_mobj(url)
3645 # Youtube returns incomplete data if tabname is not lower case
3646 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
3647
3648 if is_channel:
3649 if smuggled_data.get('is_music_url'):
3650 if item_id[:2] == 'VL':
3651 # Youtube music VL channels have an equivalent playlist
3652 item_id = item_id[2:]
3653 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 3654 elif item_id[:2] == 'MP':
3655 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
3656 item_id = self._search_regex(
3657 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
3658 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
3659 'playlist id')
3660 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 3661 elif mobj['channel_type'] == 'browse':
3662 # Youtube music /browse/ should be changed to /channel/
3663 pre = 'https://www.youtube.com/channel/%s' % item_id
3664 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
3665 # Home URLs should redirect to /videos/
6a39ee13 3666 self.report_warning(
cd7c66cf 3667 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3668 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 3669 tab = '/videos'
3670
3671 url = ''.join((pre, tab, post))
3672 mobj = get_mobj(url)
cd7c66cf 3673
3674 # Handle both video/playlist URLs
201c1459 3675 qs = parse_qs(url)
cd7c66cf 3676 video_id = qs.get('v', [None])[0]
3677 playlist_id = qs.get('list', [None])[0]
3678
fe03a6cd 3679 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 3680 if not playlist_id:
fe03a6cd 3681 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 3682 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 3683 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 3684 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3685 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 3686 mobj = get_mobj(url)
cd7c66cf 3687
3688 if video_id and playlist_id:
a06916d9 3689 if self.get_param('noplaylist'):
cd7c66cf 3690 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3691 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3692 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3693
3694 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3695
18db7548 3696 tabs = try_get(
3697 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3698 if tabs:
3699 selected_tab = self._extract_selected_tab(tabs)
3700 tab_name = selected_tab.get('title', '')
3701 if (mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]
3702 and 'no-youtube-channel-redirect' not in compat_opts):
3703 if not mobj['not_channel'] and item_id[:2] == 'UC':
3704 # Topic channels don't have /videos. Use the equivalent playlist instead
3705 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
3706 pl_id = 'UU%s' % item_id[2:]
3707 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
3708 try:
3709 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
3710 for alert_type, alert_message in self._extract_alerts(pl_data):
3711 if alert_type == 'error':
3712 raise ExtractorError('Youtube said: %s' % alert_message)
3713 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
3714 except ExtractorError:
3715 self.report_warning('The playlist gave error. Falling back to channel URL')
3716 else:
3717 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
3718
3719 self.write_debug('Final URL: %s' % url)
3720
358de58c 3721 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3722 if 'no-youtube-unavailable-videos' not in compat_opts:
3723 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 3724 self._extract_and_report_alerts(data)
358de58c 3725
8bdd16b4 3726 tabs = try_get(
3727 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3728 if tabs:
d069eca7 3729 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3730
8bdd16b4 3731 playlist = try_get(
3732 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3733 if playlist:
79360d99 3734 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3735
a0566bbf 3736 video_id = try_get(
3737 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3738 compat_str) or video_id
8bdd16b4 3739 if video_id:
6a39ee13 3740 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3741 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3742
8bdd16b4 3743 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3744
c5e8d7af 3745
8bdd16b4 3746class YoutubePlaylistIE(InfoExtractor):
3747 IE_DESC = 'YouTube.com playlists'
3748 _VALID_URL = r'''(?x)(?:
3749 (?:https?://)?
3750 (?:\w+\.)?
3751 (?:
3752 (?:
3753 youtube(?:kids)?\.com|
29f7c58a 3754 invidio\.us
8bdd16b4 3755 )
3756 /.*?\?.*?\blist=
3757 )?
3758 (?P<id>%(playlist_id)s)
3759 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3760 IE_NAME = 'youtube:playlist'
cdc628a4 3761 _TESTS = [{
8bdd16b4 3762 'note': 'issue #673',
3763 'url': 'PLBB231211A4F62143',
cdc628a4 3764 'info_dict': {
8bdd16b4 3765 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3766 'id': 'PLBB231211A4F62143',
3767 'uploader': 'Wickydoo',
3768 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3769 },
3770 'playlist_mincount': 29,
3771 }, {
3772 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3773 'info_dict': {
3774 'title': 'YDL_safe_search',
3775 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3776 },
3777 'playlist_count': 2,
3778 'skip': 'This playlist is private',
9558dcec 3779 }, {
8bdd16b4 3780 'note': 'embedded',
3781 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3782 'playlist_count': 4,
9558dcec 3783 'info_dict': {
8bdd16b4 3784 'title': 'JODA15',
3785 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3786 'uploader': 'milan',
3787 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3788 }
cdc628a4 3789 }, {
8bdd16b4 3790 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3791 'playlist_mincount': 982,
3792 'info_dict': {
3793 'title': '2018 Chinese New Singles (11/6 updated)',
3794 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3795 'uploader': 'LBK',
3796 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3797 }
daa0df9e 3798 }, {
29f7c58a 3799 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3800 'only_matching': True,
3801 }, {
3802 # music album playlist
3803 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3804 'only_matching': True,
3805 }]
3806
3807 @classmethod
3808 def suitable(cls, url):
201c1459 3809 if YoutubeTabIE.suitable(url):
3810 return False
1bdae7d3 3811 # Hack for lazy extractors until more generic solution is implemented
3812 # (see #28780)
3813 from .youtube import parse_qs
201c1459 3814 qs = parse_qs(url)
3815 if qs.get('v', [None])[0]:
3816 return False
3817 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3818
3819 def _real_extract(self, url):
3820 playlist_id = self._match_id(url)
9297939e 3821 is_music_url = self.is_music_url(url)
3822 url = update_url_query(
3823 'https://www.youtube.com/playlist',
3824 parse_qs(url) or {'list': playlist_id})
3825 if is_music_url:
3826 url = smuggle_url(url, {'is_music_url': True})
3827 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 3828
3829
3830class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3831 IE_DESC = 'youtu.be'
29f7c58a 3832 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3833 _TESTS = [{
8bdd16b4 3834 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3835 'info_dict': {
3836 'id': 'yeWKywCrFtk',
3837 'ext': 'mp4',
3838 'title': 'Small Scale Baler and Braiding Rugs',
3839 'uploader': 'Backus-Page House Museum',
3840 'uploader_id': 'backuspagemuseum',
3841 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3842 'upload_date': '20161008',
3843 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3844 'categories': ['Nonprofits & Activism'],
3845 'tags': list,
3846 'like_count': int,
3847 'dislike_count': int,
3848 },
3849 'params': {
3850 'noplaylist': True,
3851 'skip_download': True,
3852 },
39e7107d 3853 }, {
8bdd16b4 3854 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3855 'only_matching': True,
cdc628a4
PH
3856 }]
3857
8bdd16b4 3858 def _real_extract(self, url):
29f7c58a 3859 mobj = re.match(self._VALID_URL, url)
3860 video_id = mobj.group('id')
3861 playlist_id = mobj.group('playlist_id')
8bdd16b4 3862 return self.url_result(
29f7c58a 3863 update_url_query('https://www.youtube.com/watch', {
3864 'v': video_id,
3865 'list': playlist_id,
3866 'feature': 'youtu.be',
3867 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3868
3869
3870class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3871 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3872 _VALID_URL = r'ytuser:(?P<id>.+)'
3873 _TESTS = [{
3874 'url': 'ytuser:phihag',
3875 'only_matching': True,
3876 }]
3877
3878 def _real_extract(self, url):
3879 user_id = self._match_id(url)
3880 return self.url_result(
3881 'https://www.youtube.com/user/%s' % user_id,
3882 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3883
b05654f0 3884
3d3dddc9 3885class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3886 IE_NAME = 'youtube:favorites'
3887 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3888 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3889 _LOGIN_REQUIRED = True
3890 _TESTS = [{
3891 'url': ':ytfav',
3892 'only_matching': True,
3893 }, {
3894 'url': ':ytfavorites',
3895 'only_matching': True,
3896 }]
3897
3898 def _real_extract(self, url):
3899 return self.url_result(
3900 'https://www.youtube.com/playlist?list=LL',
3901 ie=YoutubeTabIE.ie_key())
3902
3903
79360d99 3904class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3905 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3906 # there doesn't appear to be a real limit, for example if you search for
3907 # 'python' you get more than 8.000.000 results
3908 _MAX_RESULTS = float('inf')
78caa52a 3909 IE_NAME = 'youtube:search'
b05654f0 3910 _SEARCH_KEY = 'ytsearch'
6c894ea1 3911 _SEARCH_PARAMS = None
9dd8e46a 3912 _TESTS = []
b05654f0 3913
6c894ea1 3914 def _entries(self, query, n):
a5c56234 3915 data = {'query': query}
6c894ea1
U
3916 if self._SEARCH_PARAMS:
3917 data['params'] = self._SEARCH_PARAMS
3918 total = 0
3919 for page_num in itertools.count(1):
79360d99 3920 search = self._extract_response(
3921 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3922 check_get_keys=('contents', 'onResponseReceivedCommands')
3923 )
6c894ea1 3924 if not search:
b4c08069 3925 break
6c894ea1
U
3926 slr_contents = try_get(
3927 search,
3928 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3929 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3930 list)
3931 if not slr_contents:
a22b2fd1 3932 break
0366ae87 3933
0366ae87
M
3934 # Youtube sometimes adds promoted content to searches,
3935 # changing the index location of videos and token.
3936 # So we search through all entries till we find them.
30a074c2 3937 continuation_token = None
3938 for slr_content in slr_contents:
a96c6d15 3939 if continuation_token is None:
3940 continuation_token = try_get(
3941 slr_content,
3942 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3943 compat_str)
3944
30a074c2 3945 isr_contents = try_get(
3946 slr_content,
3947 lambda x: x['itemSectionRenderer']['contents'],
3948 list)
9da76d30 3949 if not isr_contents:
30a074c2 3950 continue
3951 for content in isr_contents:
3952 if not isinstance(content, dict):
3953 continue
3954 video = content.get('videoRenderer')
3955 if not isinstance(video, dict):
3956 continue
3957 video_id = video.get('videoId')
3958 if not video_id:
3959 continue
3960
3961 yield self._extract_video(video)
3962 total += 1
3963 if total == n:
3964 return
0366ae87 3965
0366ae87 3966 if not continuation_token:
6c894ea1 3967 break
0366ae87 3968 data['continuation'] = continuation_token
b05654f0 3969
6c894ea1
U
3970 def _get_n_results(self, query, n):
3971 """Get a specified number of results for a query"""
3972 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3973
c9ae7b95 3974
a3dd9248 3975class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3976 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3977 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3978 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3979 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3980
c9ae7b95 3981
386e1dd9 3982class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3983 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3984 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3985 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3986 # _MAX_RESULTS = 100
3462ffa8 3987 _TESTS = [{
3988 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3989 'playlist_mincount': 5,
3990 'info_dict': {
3991 'title': 'youtube-dl test video',
3992 }
3993 }, {
3994 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3995 'only_matching': True,
3996 }]
3997
386e1dd9 3998 @classmethod
3999 def _make_valid_url(cls):
4000 return cls._VALID_URL
4001
3462ffa8 4002 def _real_extract(self, url):
386e1dd9 4003 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4004 query = (qs.get('search_query') or qs.get('q'))[0]
4005 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4006 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4007
4008
4009class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4010 """
25f14e9f 4011 Base class for feed extractors
3d3dddc9 4012 Subclasses must define the _FEED_NAME property.
d7ae0639 4013 """
b2e8bc1b 4014 _LOGIN_REQUIRED = True
ef2f3c7f 4015 _TESTS = []
d7ae0639
JMF
4016
4017 @property
4018 def IE_NAME(self):
78caa52a 4019 return 'youtube:%s' % self._FEED_NAME
04cc9617 4020
81f0259b 4021 def _real_initialize(self):
b2e8bc1b 4022 self._login()
81f0259b 4023
3853309f 4024 def _real_extract(self, url):
3d3dddc9 4025 return self.url_result(
4026 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4027 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4028
4029
ef2f3c7f 4030class YoutubeWatchLaterIE(InfoExtractor):
4031 IE_NAME = 'youtube:watchlater'
70d5c17b 4032 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4033 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4034 _TESTS = [{
8bdd16b4 4035 'url': ':ytwatchlater',
bc7a9cd8
S
4036 'only_matching': True,
4037 }]
25f14e9f
S
4038
4039 def _real_extract(self, url):
ef2f3c7f 4040 return self.url_result(
4041 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4042
4043
25f14e9f
S
4044class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4045 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4046 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4047 _FEED_NAME = 'recommended'
3d3dddc9 4048 _TESTS = [{
4049 'url': ':ytrec',
4050 'only_matching': True,
4051 }, {
4052 'url': ':ytrecommended',
4053 'only_matching': True,
4054 }, {
4055 'url': 'https://youtube.com',
4056 'only_matching': True,
4057 }]
1ed5b5c9 4058
1ed5b5c9 4059
25f14e9f 4060class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4061 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4062 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4063 _FEED_NAME = 'subscriptions'
3d3dddc9 4064 _TESTS = [{
4065 'url': ':ytsubs',
4066 'only_matching': True,
4067 }, {
4068 'url': ':ytsubscriptions',
4069 'only_matching': True,
4070 }]
1ed5b5c9 4071
1ed5b5c9 4072
25f14e9f 4073class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4074 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4075 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4076 _FEED_NAME = 'history'
3d3dddc9 4077 _TESTS = [{
4078 'url': ':ythistory',
4079 'only_matching': True,
4080 }]
1ed5b5c9
JMF
4081
4082
15870e90
PH
4083class YoutubeTruncatedURLIE(InfoExtractor):
4084 IE_NAME = 'youtube:truncated_url'
4085 IE_DESC = False # Do not list
975d35db 4086 _VALID_URL = r'''(?x)
b95aab84
PH
4087 (?:https?://)?
4088 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4089 (?:watch\?(?:
c4808c60 4090 feature=[a-z_]+|
b95aab84
PH
4091 annotation_id=annotation_[^&]+|
4092 x-yt-cl=[0-9]+|
c1708b89 4093 hl=[^&]*|
287be8c6 4094 t=[0-9]+
b95aab84
PH
4095 )?
4096 |
4097 attribution_link\?a=[^&]+
4098 )
4099 $
975d35db 4100 '''
15870e90 4101
c4808c60 4102 _TESTS = [{
2d3d2997 4103 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4104 'only_matching': True,
dc2fc736 4105 }, {
2d3d2997 4106 'url': 'https://www.youtube.com/watch?',
dc2fc736 4107 'only_matching': True,
b95aab84
PH
4108 }, {
4109 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4110 'only_matching': True,
4111 }, {
4112 'url': 'https://www.youtube.com/watch?feature=foo',
4113 'only_matching': True,
c1708b89
PH
4114 }, {
4115 'url': 'https://www.youtube.com/watch?hl=en-GB',
4116 'only_matching': True,
287be8c6
PH
4117 }, {
4118 'url': 'https://www.youtube.com/watch?t=2372',
4119 'only_matching': True,
c4808c60
PH
4120 }]
4121
15870e90
PH
4122 def _real_extract(self, url):
4123 raise ExtractorError(
78caa52a
PH
4124 'Did you forget to quote the URL? Remember that & is a meta '
4125 'character in most shells, so you want to put the URL in quotes, '
3867038a 4126 'like youtube-dl '
2d3d2997 4127 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4128 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4129 expected=True)
772fd5cc
PH
4130
4131
4132class YoutubeTruncatedIDIE(InfoExtractor):
4133 IE_NAME = 'youtube:truncated_id'
4134 IE_DESC = False # Do not list
b95aab84 4135 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4136
4137 _TESTS = [{
4138 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4139 'only_matching': True,
4140 }]
4141
4142 def _real_extract(self, url):
4143 video_id = self._match_id(url)
4144 raise ExtractorError(
4145 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4146 expected=True)