]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube:comments] Improve comment vote count parsing (fixes #506) (#508)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
a5c56234 8import hashlib
0ca96d48 9import itertools
c5e8d7af 10import json
c4417ddb 11import os.path
d77ab8e2 12import random
c5e8d7af 13import re
8a784c74 14import time
e0df6211 15import traceback
c5e8d7af 16
b05654f0 17from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 18from ..compat import (
edf3e38e 19 compat_chr,
29f7c58a 20 compat_HTTPError,
c5e8d7af 21 compat_parse_qs,
545cc85d 22 compat_str,
7fd002c0 23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
4bb4a188 27)
545cc85d 28from ..jsinterp import JSInterpreter
4bb4a188 29from ..utils import (
c224251a 30 bool_or_none,
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
26fe8ffe 33 dict_get,
d92f5d5a 34 datetime_from_str,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
b60419c5 37 format_field,
2d30521a 38 float_or_none,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
94278f72 41 mimetype2ext,
6310acf5 42 parse_codecs,
49bd8c66 43 parse_count,
7c80519c 44 parse_duration,
dca3ff4a 45 qualities,
3995d37d 46 remove_start,
cf7e015f 47 smuggle_url,
dbdaaa23 48 str_or_none,
c93d53f5 49 str_to_int,
556dbe7f 50 try_get,
c5e8d7af
PH
51 unescapeHTML,
52 unified_strdate,
cf7e015f 53 unsmuggle_url,
8bdd16b4 54 update_url_query,
21c340b8 55 url_or_none,
6e6bc8da 56 urlencode_postdata,
d92f5d5a 57 urljoin
c5e8d7af
PH
58)
59
5f6a1245 60
201c1459 61def parse_qs(url):
62 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
63
64
de7f3446 65class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
66 """Provide base functions for Youtube extractors"""
67 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 68 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
69
70 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
71 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
72 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 73
3462ffa8 74 _RESERVED_NAMES = (
bea74222 75 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 76 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 77 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 78
b2e8bc1b
JMF
79 _NETRC_MACHINE = 'youtube'
80 # If True it will raise an error if no login info is provided
81 _LOGIN_REQUIRED = False
82
70d5c17b 83 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 84
b2e8bc1b 85 def _login(self):
83317f69 86 """
87 Attempt to log in to YouTube.
88 True is returned if successful or skipped.
89 False is returned if login failed.
90
91 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
92 """
9d5d4d64 93
94 def warn(message):
95 self.report_warning(message)
96
97 # username+password login is broken
98 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
99 self.raise_login_required(
100 'Login details are needed to download this content', method='cookies')
68217024 101 username, password = self._get_login_info()
9d5d4d64 102 if username:
103 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
104 return
9d5d4d64 105
2d6659b9 106 # Everything below this is broken!
107 r'''
b2e8bc1b
JMF
108 # No authentication to be performed
109 if username is None:
a06916d9 110 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 111 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 112 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 113 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 114 return True
b2e8bc1b 115
7cc3570e
PH
116 login_page = self._download_webpage(
117 self._LOGIN_URL, None,
69ea8ca4
PH
118 note='Downloading login page',
119 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
120 if login_page is False:
121 return
b2e8bc1b 122
1212e997 123 login_form = self._hidden_inputs(login_page)
c5e8d7af 124
e00eb564
S
125 def req(url, f_req, note, errnote):
126 data = login_form.copy()
127 data.update({
128 'pstMsg': 1,
129 'checkConnection': 'youtube',
130 'checkedDomains': 'youtube',
131 'hl': 'en',
132 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 133 'f.req': json.dumps(f_req),
e00eb564
S
134 'flowName': 'GlifWebSignIn',
135 'flowEntry': 'ServiceLogin',
baf67a60
S
136 # TODO: reverse actual botguard identifier generation algo
137 'bgRequest': '["identifier",""]',
041bc3ad 138 })
e00eb564
S
139 return self._download_json(
140 url, None, note=note, errnote=errnote,
141 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
142 fatal=False,
143 data=urlencode_postdata(data), headers={
144 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
145 'Google-Accounts-XSRF': 1,
146 })
147
3995d37d
S
148 lookup_req = [
149 username,
150 None, [], None, 'US', None, None, 2, False, True,
151 [
152 None, None,
153 [2, 1, None, 1,
154 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
155 None, [], 4],
156 1, [None, None, []], None, None, None, True
157 ],
158 username,
159 ]
160
e00eb564 161 lookup_results = req(
3995d37d 162 self._LOOKUP_URL, lookup_req,
e00eb564
S
163 'Looking up account info', 'Unable to look up account info')
164
165 if lookup_results is False:
166 return False
041bc3ad 167
3995d37d
S
168 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
169 if not user_hash:
170 warn('Unable to extract user hash')
171 return False
172
173 challenge_req = [
174 user_hash,
175 None, 1, None, [1, None, None, None, [password, None, True]],
176 [
177 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
178 1, [None, None, []], None, None, None, True
179 ]]
83317f69 180
3995d37d
S
181 challenge_results = req(
182 self._CHALLENGE_URL, challenge_req,
183 'Logging in', 'Unable to log in')
83317f69 184
3995d37d 185 if challenge_results is False:
e00eb564 186 return
83317f69 187
3995d37d
S
188 login_res = try_get(challenge_results, lambda x: x[0][5], list)
189 if login_res:
190 login_msg = try_get(login_res, lambda x: x[5], compat_str)
191 warn(
192 'Unable to login: %s' % 'Invalid password'
193 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
194 return False
195
196 res = try_get(challenge_results, lambda x: x[0][-1], list)
197 if not res:
198 warn('Unable to extract result entry')
199 return False
200
9a6628aa
S
201 login_challenge = try_get(res, lambda x: x[0][0], list)
202 if login_challenge:
203 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
204 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
205 # SEND_SUCCESS - TFA code has been successfully sent to phone
206 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 207 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
208 if status == 'QUOTA_EXCEEDED':
209 warn('Exceeded the limit of TFA codes, try later')
210 return False
211
212 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
213 if not tl:
214 warn('Unable to extract TL')
215 return False
216
217 tfa_code = self._get_tfa_info('2-step verification code')
218
219 if not tfa_code:
220 warn(
221 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
222 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
223 return False
224
225 tfa_code = remove_start(tfa_code, 'G-')
226
227 tfa_req = [
228 user_hash, None, 2, None,
229 [
230 9, None, None, None, None, None, None, None,
231 [None, tfa_code, True, 2]
232 ]]
233
234 tfa_results = req(
235 self._TFA_URL.format(tl), tfa_req,
236 'Submitting TFA code', 'Unable to submit TFA code')
237
238 if tfa_results is False:
239 return False
240
241 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
242 if tfa_res:
243 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
244 warn(
245 'Unable to finish TFA: %s' % 'Invalid TFA code'
246 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
247 return False
248
249 check_cookie_url = try_get(
250 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
251 else:
252 CHALLENGES = {
253 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
254 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
255 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
256 }
257 challenge = CHALLENGES.get(
258 challenge_str,
259 '%s returned error %s.' % (self.IE_NAME, challenge_str))
260 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
261 return False
3995d37d
S
262 else:
263 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
264
265 if not check_cookie_url:
266 warn('Unable to extract CheckCookie URL')
267 return False
e00eb564
S
268
269 check_cookie_results = self._download_webpage(
3995d37d
S
270 check_cookie_url, None, 'Checking cookie', fatal=False)
271
272 if check_cookie_results is False:
273 return False
e00eb564 274
3995d37d
S
275 if 'https://myaccount.google.com/' not in check_cookie_results:
276 warn('Unable to log in')
b2e8bc1b 277 return False
e00eb564 278
b2e8bc1b 279 return True
2d6659b9 280 '''
b2e8bc1b 281
cce889b9 282 def _initialize_consent(self):
283 cookies = self._get_cookies('https://www.youtube.com/')
284 if cookies.get('__Secure-3PSID'):
285 return
286 consent_id = None
287 consent = cookies.get('CONSENT')
288 if consent:
289 if 'YES' in consent.value:
290 return
291 consent_id = self._search_regex(
292 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
293 if not consent_id:
294 consent_id = random.randint(100, 999)
295 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 296
b2e8bc1b 297 def _real_initialize(self):
cce889b9 298 self._initialize_consent()
b2e8bc1b
JMF
299 if self._downloader is None:
300 return
b2e8bc1b
JMF
301 if not self._login():
302 return
c5e8d7af 303
a0566bbf 304 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 305 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
306 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 307
109dd3b2 308 _YT_DEFAULT_YTCFGS = {
309 'WEB': {
310 'INNERTUBE_API_VERSION': 'v1',
311 'INNERTUBE_CLIENT_NAME': 'WEB',
312 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
313 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
314 'INNERTUBE_CONTEXT': {
315 'client': {
316 'clientName': 'WEB',
317 'clientVersion': '2.20210622.10.00',
318 'hl': 'en',
319 }
320 },
321 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
322 },
323 'WEB_REMIX': {
324 'INNERTUBE_API_VERSION': 'v1',
325 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
326 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
327 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
328 'INNERTUBE_CONTEXT': {
329 'client': {
330 'clientName': 'WEB_REMIX',
331 'clientVersion': '1.20210621.00.00',
332 'hl': 'en',
333 }
334 },
335 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
336 },
337 'WEB_EMBEDDED_PLAYER': {
338 'INNERTUBE_API_VERSION': 'v1',
339 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
340 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
341 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
342 'INNERTUBE_CONTEXT': {
343 'client': {
344 'clientName': 'WEB_EMBEDDED_PLAYER',
345 'clientVersion': '1.20210620.0.1',
346 'hl': 'en',
347 }
348 },
349 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
350 },
351 'ANDROID': {
352 'INNERTUBE_API_VERSION': 'v1',
353 'INNERTUBE_CLIENT_NAME': 'ANDROID',
354 'INNERTUBE_CLIENT_VERSION': '16.20',
355 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
356 'INNERTUBE_CONTEXT': {
357 'client': {
358 'clientName': 'ANDROID',
359 'clientVersion': '16.20',
360 'hl': 'en',
361 }
362 },
363 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'
364 },
365 'ANDROID_EMBEDDED_PLAYER': {
366 'INNERTUBE_API_VERSION': 'v1',
367 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
368 'INNERTUBE_CLIENT_VERSION': '16.20',
369 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
370 'INNERTUBE_CONTEXT': {
371 'client': {
372 'clientName': 'ANDROID_EMBEDDED_PLAYER',
373 'clientVersion': '16.20',
374 'hl': 'en',
375 }
376 },
377 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'
378 },
379 'ANDROID_MUSIC': {
380 'INNERTUBE_API_VERSION': 'v1',
381 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
382 'INNERTUBE_CLIENT_VERSION': '4.32',
383 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
384 'INNERTUBE_CONTEXT': {
385 'client': {
386 'clientName': 'ANDROID_MUSIC',
387 'clientVersion': '4.32',
388 'hl': 'en',
389 }
390 },
391 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'
392 }
393 }
394
395 _YT_DEFAULT_INNERTUBE_HOSTS = {
396 'DIRECT': 'youtubei.googleapis.com',
397 'WEB': 'www.youtube.com',
398 'WEB_REMIX': 'music.youtube.com',
399 'ANDROID_MUSIC': 'music.youtube.com'
400 }
401
402 def _get_default_ytcfg(self, client='WEB'):
403 if client in self._YT_DEFAULT_YTCFGS:
404 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
405 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
406 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
407
408 def _get_innertube_host(self, client='WEB'):
409 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
410
411 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
412 # try_get but with fallback to default ytcfg client values when present
413 _func = lambda y: try_get(y, getter, expected_type)
414 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
415
416 def _extract_client_name(self, ytcfg, default_client='WEB'):
417 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
418
419 def _extract_client_version(self, ytcfg, default_client='WEB'):
420 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
421
422 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
423 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
424
425 def _extract_context(self, ytcfg=None, default_client='WEB'):
426 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
427 context = _get_context(ytcfg)
428 if context:
429 return context
430
431 context = _get_context(self._get_default_ytcfg(default_client))
432 if not ytcfg:
433 return context
434
435 # Recreate the client context (required)
436 context['client'].update({
437 'clientVersion': self._extract_client_version(ytcfg, default_client),
438 'clientName': self._extract_client_name(ytcfg, default_client),
439 })
440 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
441 if visitor_data:
442 context['client']['visitorData'] = visitor_data
443 return context
444
445 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 446 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
447 # See: https://github.com/yt-dlp/yt-dlp/issues/393
448 yt_cookies = self._get_cookies('https://www.youtube.com')
449 sapisid_cookie = dict_get(
450 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
451 if sapisid_cookie is None:
452 return
453 time_now = round(time.time())
1974e99f 454 # SAPISID cookie is required if not already present
455 if not yt_cookies.get('SAPISID'):
456 self._set_cookie(
457 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
458 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
459 sapisidhash = hashlib.sha1(
109dd3b2 460 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 461 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
462
463 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 464 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 465 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 466
109dd3b2 467 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 468 data.update(query)
109dd3b2 469 real_headers = self._generate_api_headers(client=default_client)
f4f751af 470 real_headers.update({'content-type': 'application/json'})
471 if headers:
472 real_headers.update(headers)
545cc85d 473 return self._download_json(
109dd3b2 474 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 475 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 476 data=json.dumps(data).encode('utf8'), headers=real_headers,
477 query={'key': api_key or self._extract_api_key()})
478
8bdd16b4 479 def _extract_yt_initial_data(self, video_id, webpage):
480 return self._parse_json(
481 self._search_regex(
29f7c58a 482 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 483 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 484 video_id)
0c148415 485
a1c5d2ca
M
486 def _extract_identity_token(self, webpage, item_id):
487 ytcfg = self._extract_ytcfg(item_id, webpage)
488 if ytcfg:
489 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
490 if token:
491 return token
492 return self._search_regex(
493 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
494 'identity token', default=None)
495
496 @staticmethod
497 def _extract_account_syncid(data):
8ea3f7b9 498 """
499 Extract syncId required to download private playlists of secondary channels
500 @param data Either response or ytcfg
501 """
502 sync_ids = (try_get(
503 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
504 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
505 if len(sync_ids) >= 2 and sync_ids[1]:
506 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
507 # and just "user_syncid||" for primary channel. We only want the channel_syncid
508 return sync_ids[0]
8ea3f7b9 509 # ytcfg includes channel_syncid if on secondary channel
510 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 511
29f7c58a 512 def _extract_ytcfg(self, video_id, webpage):
8c54a305 513 if not webpage:
514 return {}
29f7c58a 515 return self._parse_json(
516 self._search_regex(
517 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 518 default='{}'), video_id, fatal=False) or {}
519
109dd3b2 520 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
521 visitor_data=None, api_hostname=None, client='WEB'):
522 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
f4f751af 523 headers = {
109dd3b2 524 'X-YouTube-Client-Name': compat_str(
525 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
526 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
527 'Origin': origin
f4f751af 528 }
2d6659b9 529 if not visitor_data and ytcfg:
530 visitor_data = try_get(
531 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 532 if identity_token:
109dd3b2 533 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 534 if account_syncid:
535 headers['X-Goog-PageId'] = account_syncid
536 headers['X-Goog-AuthUser'] = 0
537 if visitor_data:
109dd3b2 538 headers['X-Goog-Visitor-Id'] = visitor_data
539 auth = self._generate_sapisidhash_header(origin)
f4f751af 540 if auth is not None:
541 headers['Authorization'] = auth
109dd3b2 542 headers['X-Origin'] = origin
f4f751af 543 return headers
29f7c58a 544
2d6659b9 545 @staticmethod
546 def _build_api_continuation_query(continuation, ctp=None):
547 query = {
548 'continuation': continuation
549 }
550 # TODO: Inconsistency with clickTrackingParams.
551 # Currently we have a fixed ctp contained within context (from ytcfg)
552 # and a ctp in root query for continuation.
553 if ctp:
554 query['clickTracking'] = {'clickTrackingParams': ctp}
555 return query
556
557 @classmethod
558 def _continuation_query_ajax_to_api(cls, continuation_query):
559 continuation = dict_get(continuation_query, ('continuation', 'ctoken'))
560 return cls._build_api_continuation_query(continuation, continuation_query.get('itct'))
561
562 @staticmethod
563 def _build_continuation_query(continuation, ctp=None):
564 query = {
565 'ctoken': continuation,
566 'continuation': continuation,
567 }
568 if ctp:
569 query['itct'] = ctp
570 return query
571
572 @classmethod
573 def _extract_next_continuation_data(cls, renderer):
574 next_continuation = try_get(
575 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
576 lambda x: x['continuation']['reloadContinuationData']), dict)
577 if not next_continuation:
578 return
579 continuation = next_continuation.get('continuation')
580 if not continuation:
581 return
582 ctp = next_continuation.get('clickTrackingParams')
583 return cls._build_continuation_query(continuation, ctp)
584
585 @classmethod
586 def _extract_continuation_ep_data(cls, continuation_ep: dict):
587 if isinstance(continuation_ep, dict):
588 continuation = try_get(
589 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
590 if not continuation:
591 return
592 ctp = continuation_ep.get('clickTrackingParams')
593 return cls._build_continuation_query(continuation, ctp)
594
595 @classmethod
596 def _extract_continuation(cls, renderer):
597 next_continuation = cls._extract_next_continuation_data(renderer)
598 if next_continuation:
599 return next_continuation
600 contents = []
601 for key in ('contents', 'items'):
602 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
603 for content in contents:
604 if not isinstance(content, dict):
605 continue
606 continuation_ep = try_get(
607 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
608 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
609 dict)
610 continuation = cls._extract_continuation_ep_data(continuation_ep)
611 if continuation:
612 return continuation
613
109dd3b2 614 @staticmethod
615 def _extract_alerts(data):
616 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
617 if not isinstance(alert_dict, dict):
618 continue
619 for alert in alert_dict.values():
620 alert_type = alert.get('type')
621 if not alert_type:
622 continue
623 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
624 if message:
625 yield alert_type, message
626 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
627 message += try_get(run, lambda x: x['text'], compat_str)
628 if message:
629 yield alert_type, message
630
631 def _report_alerts(self, alerts, expected=True):
632 errors = []
633 warnings = []
634 for alert_type, alert_message in alerts:
635 if alert_type.lower() == 'error':
636 errors.append([alert_type, alert_message])
637 else:
638 warnings.append([alert_type, alert_message])
639
640 for alert_type, alert_message in (warnings + errors[:-1]):
641 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
642 if errors:
643 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
644
645 def _extract_and_report_alerts(self, data, *args, **kwargs):
646 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
647
648 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
649 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
650 default_client='WEB'):
651 response = None
652 last_error = None
653 count = -1
654 retries = self.get_param('extractor_retries', 3)
655 if check_get_keys is None:
656 check_get_keys = []
657 while count < retries:
658 count += 1
659 if last_error:
660 self.report_warning('%s. Retrying ...' % last_error)
661 try:
662 response = self._call_api(
663 ep=ep, fatal=True, headers=headers,
664 video_id=item_id, query=query,
665 context=self._extract_context(ytcfg, default_client),
666 api_key=self._extract_api_key(ytcfg, default_client),
667 api_hostname=api_hostname, default_client=default_client,
668 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
669 except ExtractorError as e:
670 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
671 # Downloading page may result in intermittent 5xx HTTP error
672 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
673 last_error = 'HTTP Error %s' % e.cause.code
674 if count < retries:
675 continue
676 if fatal:
677 raise
678 else:
679 self.report_warning(error_to_compat_str(e))
680 return
681
682 else:
683 # Youtube may send alerts if there was an issue with the continuation page
684 try:
685 self._extract_and_report_alerts(response, expected=False)
686 except ExtractorError as e:
687 if fatal:
688 raise
689 self.report_warning(error_to_compat_str(e))
690 return
691 if not check_get_keys or dict_get(response, check_get_keys):
692 break
693 # Youtube sometimes sends incomplete data
694 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
695 last_error = 'Incomplete data received'
696 if count >= retries:
697 if fatal:
698 raise ExtractorError(last_error)
699 else:
700 self.report_warning(last_error)
701 return
702 return response
703
9297939e 704 @staticmethod
705 def is_music_url(url):
706 return re.match(r'https?://music\.youtube\.com/', url) is not None
707
30a074c2 708 def _extract_video(self, renderer):
709 video_id = renderer.get('videoId')
710 title = try_get(
711 renderer,
712 (lambda x: x['title']['runs'][0]['text'],
713 lambda x: x['title']['simpleText']), compat_str)
714 description = try_get(
715 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
716 compat_str)
717 duration = parse_duration(try_get(
718 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
719 view_count_text = try_get(
720 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
721 view_count = str_to_int(self._search_regex(
722 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
723 'view count', default=None))
724 uploader = try_get(
bc2ca1bb 725 renderer,
726 (lambda x: x['ownerText']['runs'][0]['text'],
727 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 728 return {
39ed931e 729 '_type': 'url',
30a074c2 730 'ie_key': YoutubeIE.ie_key(),
731 'id': video_id,
732 'url': video_id,
733 'title': title,
734 'description': description,
735 'duration': duration,
736 'view_count': view_count,
737 'uploader': uploader,
738 }
739
0c148415 740
360e1ca5 741class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 742 IE_DESC = 'YouTube.com'
bc2ca1bb 743 _INVIDIOUS_SITES = (
744 # invidious-redirect websites
745 r'(?:www\.)?redirect\.invidious\.io',
746 r'(?:(?:www|dev)\.)?invidio\.us',
747 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
748 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 749 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 750 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 751 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 752 # youtube-dl invidious instances list
753 r'(?:(?:www|no)\.)?invidiou\.sh',
754 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
755 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 756 r'(?:www\.)?invidious\.mastodon\.host',
757 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 758 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 759 r'(?:www\.)?invidious\.tinfoil-hat\.net',
760 r'(?:www\.)?invidious\.himiko\.cloud',
761 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 762 r'(?:www\.)?invidious\.tube',
763 r'(?:www\.)?invidiou\.site',
764 r'(?:www\.)?invidious\.site',
765 r'(?:www\.)?invidious\.xyz',
766 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 767 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 768 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 769 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 770 r'(?:www\.)?tube\.poal\.co',
771 r'(?:www\.)?tube\.connect\.cafe',
772 r'(?:www\.)?vid\.wxzm\.sx',
773 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 774 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 775 r'(?:www\.)?yewtu\.be',
776 r'(?:www\.)?yt\.elukerio\.org',
777 r'(?:www\.)?yt\.lelux\.fi',
778 r'(?:www\.)?invidious\.ggc-project\.de',
779 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 780 r'(?:www\.)?ytprivate\.com',
781 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 782 r'(?:www\.)?invidious\.toot\.koeln',
783 r'(?:www\.)?invidious\.fdn\.fr',
784 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 785 r'(?:www\.)?invidious\.namazso\.eu',
786 r'(?:www\.)?invidious\.silkky\.cloud',
787 r'(?:www\.)?invidious\.exonip\.de',
788 r'(?:www\.)?invidious\.riverside\.rocks',
789 r'(?:www\.)?invidious\.blamefran\.net',
790 r'(?:www\.)?invidious\.moomoo\.de',
791 r'(?:www\.)?ytb\.trom\.tf',
792 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 793 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
794 r'(?:www\.)?qklhadlycap4cnod\.onion',
795 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
796 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
797 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
798 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
799 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
800 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 801 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
802 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
803 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
804 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 805 )
cb7dfeea 806 _VALID_URL = r"""(?x)^
c5e8d7af 807 (
edb53e2d 808 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 809 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
810 (?:www\.)?deturl\.com/www\.youtube\.com|
811 (?:www\.)?pwnyoutube\.com|
812 (?:www\.)?hooktube\.com|
813 (?:www\.)?yourepeat\.com|
814 tube\.majestyc\.net|
815 %(invidious)s|
816 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
817 (?:.*?\#/)? # handle anchor (#/) redirect urls
818 (?: # the various things that can precede the ID:
ac7553d0 819 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 820 |(?: # or the v= param in all its forms
f7000f3a 821 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 822 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 823 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
824 v=
825 )
f4b05232 826 ))
cbaed4bb
S
827 |(?:
828 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
829 vid\.plus| # or vid.plus/xxxx
830 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 831 %(invidious)s
cbaed4bb 832 )/
edb53e2d 833 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 834 )
c5e8d7af 835 )? # all until now is optional -> you can pass the naked ID
201c1459 836 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 837 (?(1).+)? # if we found the ID, everything can follow
9297939e 838 (?:\#|$)""" % {
bc2ca1bb 839 'invidious': '|'.join(_INVIDIOUS_SITES),
840 }
e40c758c 841 _PLAYER_INFO_RE = (
cc2db878 842 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
843 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 844 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 845 )
2c62dc26 846 _formats = {
c2d3cb4c 847 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
848 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
849 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
850 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
851 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
852 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
853 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
854 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 855 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 856 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
857 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
858 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
859 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
860 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
861 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 862 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 863 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
864 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 865
866
867 # 3D videos
c2d3cb4c 868 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
869 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
870 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
871 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 872 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
873 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
874 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 875
96fb5605 876 # Apple HTTP Live Streaming
11f12195 877 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 878 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
879 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
880 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
881 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
882 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 883 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
884 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
885
886 # DASH mp4 video
d23028a8
S
887 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
888 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
889 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
890 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
891 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 892 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
893 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
894 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
895 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
896 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
897 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
898 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 899
f6f1fc92 900 # Dash mp4 audio
d23028a8
S
901 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
902 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
903 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
904 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
905 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
906 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
907 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
908
909 # Dash webm
d23028a8
S
910 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
911 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
912 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
913 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
914 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
915 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
916 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
917 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
918 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
919 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
920 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
921 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
922 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
923 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
924 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 925 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
926 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
927 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
928 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
929 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
930 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
931 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
932
933 # Dash webm audio
d23028a8
S
934 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
935 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 936
0857baad 937 # Dash webm audio with opus inside
d23028a8
S
938 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
939 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
940 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 941
ce6b9a2d
PH
942 # RTMP (unnamed)
943 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
944
945 # av01 video only formats sometimes served with "unknown" codecs
946 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
947 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
948 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
949 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 950 }
29f7c58a 951 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 952
109dd3b2 953 _AGE_GATE_REASONS = (
954 'Sign in to confirm your age',
955 'This video may be inappropriate for some users.',
956 'Sorry, this content is age-restricted.')
957
fd5c4aab
S
958 _GEO_BYPASS = False
959
78caa52a 960 IE_NAME = 'youtube'
2eb88d95
PH
961 _TESTS = [
962 {
2d3d2997 963 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
964 'info_dict': {
965 'id': 'BaW_jenozKc',
966 'ext': 'mp4',
3867038a 967 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
968 'uploader': 'Philipp Hagemeister',
969 'uploader_id': 'phihag',
ec85ded8 970 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
971 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
972 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 973 'upload_date': '20121002',
3867038a 974 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 975 'categories': ['Science & Technology'],
3867038a 976 'tags': ['youtube-dl'],
556dbe7f 977 'duration': 10,
dbdaaa23 978 'view_count': int,
3e7c1224
PH
979 'like_count': int,
980 'dislike_count': int,
7c80519c 981 'start_time': 1,
297a564b 982 'end_time': 9,
2eb88d95 983 }
0e853ca4 984 },
fccd3771 985 {
4bc3a23e
PH
986 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
987 'note': 'Embed-only video (#1746)',
988 'info_dict': {
989 'id': 'yZIXLfi8CZQ',
990 'ext': 'mp4',
991 'upload_date': '20120608',
992 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
993 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
994 'uploader': 'SET India',
94bfcd23 995 'uploader_id': 'setindia',
ec85ded8 996 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 997 'age_limit': 18,
545cc85d 998 },
999 'skip': 'Private video',
fccd3771 1000 },
11b56058 1001 {
8bdd16b4 1002 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1003 'note': 'Use the first video ID in the URL',
1004 'info_dict': {
1005 'id': 'BaW_jenozKc',
1006 'ext': 'mp4',
3867038a 1007 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1008 'uploader': 'Philipp Hagemeister',
1009 'uploader_id': 'phihag',
ec85ded8 1010 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1011 'upload_date': '20121002',
3867038a 1012 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1013 'categories': ['Science & Technology'],
3867038a 1014 'tags': ['youtube-dl'],
556dbe7f 1015 'duration': 10,
dbdaaa23 1016 'view_count': int,
11b56058
PM
1017 'like_count': int,
1018 'dislike_count': int,
34a7de29
S
1019 },
1020 'params': {
1021 'skip_download': True,
1022 },
11b56058 1023 },
dd27fd17 1024 {
2d3d2997 1025 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1026 'note': '256k DASH audio (format 141) via DASH manifest',
1027 'info_dict': {
1028 'id': 'a9LDPn-MO4I',
1029 'ext': 'm4a',
1030 'upload_date': '20121002',
1031 'uploader_id': '8KVIDEO',
ec85ded8 1032 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1033 'description': '',
1034 'uploader': '8KVIDEO',
1035 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1036 },
4bc3a23e
PH
1037 'params': {
1038 'youtube_include_dash_manifest': True,
1039 'format': '141',
4919603f 1040 },
de3c7fe0 1041 'skip': 'format 141 not served anymore',
dd27fd17 1042 },
8bdd16b4 1043 # DASH manifest with encrypted signature
1044 {
1045 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1046 'info_dict': {
1047 'id': 'IB3lcPjvWLA',
1048 'ext': 'm4a',
1049 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1050 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1051 'duration': 244,
1052 'uploader': 'AfrojackVEVO',
1053 'uploader_id': 'AfrojackVEVO',
1054 'upload_date': '20131011',
cc2db878 1055 'abr': 129.495,
8bdd16b4 1056 },
1057 'params': {
1058 'youtube_include_dash_manifest': True,
1059 'format': '141/bestaudio[ext=m4a]',
1060 },
1061 },
aa79ac0c
PH
1062 # Controversy video
1063 {
1064 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
1065 'info_dict': {
1066 'id': 'T4XJQO3qol8',
1067 'ext': 'mp4',
556dbe7f 1068 'duration': 219,
aa79ac0c 1069 'upload_date': '20100909',
4fe54c12 1070 'uploader': 'Amazing Atheist',
aa79ac0c 1071 'uploader_id': 'TheAmazingAtheist',
ec85ded8 1072 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 1073 'title': 'Burning Everyone\'s Koran',
545cc85d 1074 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 1075 }
c522adb1 1076 },
dd2d55f1 1077 # Normal age-gate video (embed allowed)
c522adb1 1078 {
2d3d2997 1079 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1080 'info_dict': {
1081 'id': 'HtVdAasjOgU',
1082 'ext': 'mp4',
1083 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1084 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1085 'duration': 142,
c522adb1
JMF
1086 'uploader': 'The Witcher',
1087 'uploader_id': 'WitcherGame',
ec85ded8 1088 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1089 'upload_date': '20140605',
34952f09 1090 'age_limit': 18,
c522adb1
JMF
1091 },
1092 },
8bdd16b4 1093 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1094 # YouTube Red ad is not captured for creator
1095 {
1096 'url': '__2ABJjxzNo',
1097 'info_dict': {
1098 'id': '__2ABJjxzNo',
1099 'ext': 'mp4',
1100 'duration': 266,
1101 'upload_date': '20100430',
1102 'uploader_id': 'deadmau5',
1103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1104 'creator': 'deadmau5',
1105 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1106 'uploader': 'deadmau5',
1107 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1108 'alt_title': 'Some Chords',
8bdd16b4 1109 },
1110 'expected_warnings': [
1111 'DASH manifest missing',
1112 ]
1113 },
067aa17e 1114 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1115 {
1116 'url': 'lqQg6PlCWgI',
1117 'info_dict': {
1118 'id': 'lqQg6PlCWgI',
1119 'ext': 'mp4',
556dbe7f 1120 'duration': 6085,
90227264 1121 'upload_date': '20150827',
cbe2bd91 1122 'uploader_id': 'olympic',
ec85ded8 1123 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1124 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 1125 'uploader': 'Olympic',
cbe2bd91
PH
1126 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1127 },
1128 'params': {
1129 'skip_download': 'requires avconv',
e52a40ab 1130 }
cbe2bd91 1131 },
6271f1ca
PH
1132 # Non-square pixels
1133 {
1134 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1135 'info_dict': {
1136 'id': '_b-2C3KPAM0',
1137 'ext': 'mp4',
1138 'stretched_ratio': 16 / 9.,
556dbe7f 1139 'duration': 85,
6271f1ca
PH
1140 'upload_date': '20110310',
1141 'uploader_id': 'AllenMeow',
ec85ded8 1142 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1143 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1144 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1145 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1146 },
06b491eb
S
1147 },
1148 # url_encoded_fmt_stream_map is empty string
1149 {
1150 'url': 'qEJwOuvDf7I',
1151 'info_dict': {
1152 'id': 'qEJwOuvDf7I',
f57b7835 1153 'ext': 'webm',
06b491eb
S
1154 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1155 'description': '',
1156 'upload_date': '20150404',
1157 'uploader_id': 'spbelect',
1158 'uploader': 'Наблюдатели Петербурга',
1159 },
1160 'params': {
1161 'skip_download': 'requires avconv',
e323cf3f
S
1162 },
1163 'skip': 'This live event has ended.',
06b491eb 1164 },
067aa17e 1165 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1166 {
1167 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1168 'info_dict': {
1169 'id': 'FIl7x6_3R5Y',
eb6793ba 1170 'ext': 'webm',
da77d856
S
1171 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1172 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1173 'duration': 220,
da77d856
S
1174 'upload_date': '20150625',
1175 'uploader_id': 'dorappi2000',
ec85ded8 1176 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1177 'uploader': 'dorappi2000',
eb6793ba 1178 'formats': 'mincount:31',
da77d856 1179 },
eb6793ba 1180 'skip': 'not actual anymore',
2ee8f5d8 1181 },
8a1a26ce
YCH
1182 # DASH manifest with segment_list
1183 {
1184 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1185 'md5': '8ce563a1d667b599d21064e982ab9e31',
1186 'info_dict': {
1187 'id': 'CsmdDsKjzN8',
1188 'ext': 'mp4',
17ee98e1 1189 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1190 'uploader': 'Airtek',
1191 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1192 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1193 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1194 },
1195 'params': {
1196 'youtube_include_dash_manifest': True,
1197 'format': '135', # bestvideo
be49068d
S
1198 },
1199 'skip': 'This live event has ended.',
2ee8f5d8 1200 },
cf7e015f
S
1201 {
1202 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1203 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1204 'info_dict': {
545cc85d 1205 'id': 'jvGDaLqkpTg',
1206 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1207 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1208 },
1209 'playlist': [{
1210 'info_dict': {
545cc85d 1211 'id': 'jvGDaLqkpTg',
cf7e015f 1212 'ext': 'mp4',
545cc85d 1213 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1214 'description': 'md5:e03b909557865076822aa169218d6a5d',
1215 'duration': 10643,
1216 'upload_date': '20161111',
1217 'uploader': 'Team PGP',
1218 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1219 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1220 },
1221 }, {
1222 'info_dict': {
545cc85d 1223 'id': '3AKt1R1aDnw',
cf7e015f 1224 'ext': 'mp4',
545cc85d 1225 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1226 'description': 'md5:e03b909557865076822aa169218d6a5d',
1227 'duration': 10991,
1228 'upload_date': '20161111',
1229 'uploader': 'Team PGP',
1230 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1231 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1232 },
1233 }, {
1234 'info_dict': {
545cc85d 1235 'id': 'RtAMM00gpVc',
cf7e015f 1236 'ext': 'mp4',
545cc85d 1237 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1238 'description': 'md5:e03b909557865076822aa169218d6a5d',
1239 'duration': 10995,
1240 'upload_date': '20161111',
1241 'uploader': 'Team PGP',
1242 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1243 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1244 },
1245 }, {
1246 'info_dict': {
545cc85d 1247 'id': '6N2fdlP3C5U',
cf7e015f 1248 'ext': 'mp4',
545cc85d 1249 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1250 'description': 'md5:e03b909557865076822aa169218d6a5d',
1251 'duration': 10990,
1252 'upload_date': '20161111',
1253 'uploader': 'Team PGP',
1254 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1255 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1256 },
1257 }],
1258 'params': {
1259 'skip_download': True,
1260 },
cbaed4bb 1261 },
f9f49d87 1262 {
067aa17e 1263 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1264 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1265 'info_dict': {
1266 'id': 'gVfLd0zydlo',
1267 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1268 },
1269 'playlist_count': 2,
be49068d 1270 'skip': 'Not multifeed anymore',
f9f49d87 1271 },
cbaed4bb 1272 {
2d3d2997 1273 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1274 'only_matching': True,
0e49d9a6 1275 },
6d4fc66b 1276 {
2d3d2997 1277 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1278 'only_matching': True,
1279 },
0e49d9a6 1280 {
067aa17e 1281 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1282 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1283 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1284 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1285 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1286 'info_dict': {
1287 'id': 'lsguqyKfVQg',
1288 'ext': 'mp4',
1289 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 1290 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 1291 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1292 'duration': 133,
0e49d9a6
LL
1293 'upload_date': '20151119',
1294 'uploader_id': 'IronSoulElf',
ec85ded8 1295 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1296 'uploader': 'IronSoulElf',
eb6793ba
S
1297 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1298 'track': 'Dark Walk - Position Music',
1299 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1300 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1301 },
1302 'params': {
1303 'skip_download': True,
1304 },
1305 },
61f92af1 1306 {
067aa17e 1307 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1308 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1309 'only_matching': True,
1310 },
313dfc45
LL
1311 {
1312 # Video with yt:stretch=17:0
1313 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1314 'info_dict': {
1315 'id': 'Q39EVAstoRM',
1316 'ext': 'mp4',
1317 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1318 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1319 'upload_date': '20151107',
1320 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1321 'uploader': 'CH GAMER DROID',
1322 },
1323 'params': {
1324 'skip_download': True,
1325 },
be49068d 1326 'skip': 'This video does not exist.',
313dfc45 1327 },
201c1459 1328 {
1329 # Video with incomplete 'yt:stretch=16:'
1330 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1331 'only_matching': True,
1332 },
7caf9830
S
1333 {
1334 # Video licensed under Creative Commons
1335 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1336 'info_dict': {
1337 'id': 'M4gD1WSo5mA',
1338 'ext': 'mp4',
1339 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1340 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1341 'duration': 721,
7caf9830
S
1342 'upload_date': '20150127',
1343 'uploader_id': 'BerkmanCenter',
ec85ded8 1344 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1345 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1346 'license': 'Creative Commons Attribution license (reuse allowed)',
1347 },
1348 'params': {
1349 'skip_download': True,
1350 },
1351 },
fd050249
S
1352 {
1353 # Channel-like uploader_url
1354 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1355 'info_dict': {
1356 'id': 'eQcmzGIKrzg',
1357 'ext': 'mp4',
1358 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1359 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1360 'duration': 4060,
fd050249 1361 'upload_date': '20151119',
eb6793ba 1362 'uploader': 'Bernie Sanders',
fd050249 1363 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1364 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1365 'license': 'Creative Commons Attribution license (reuse allowed)',
1366 },
1367 'params': {
1368 'skip_download': True,
1369 },
1370 },
040ac686
S
1371 {
1372 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1373 'only_matching': True,
7f29cf54
S
1374 },
1375 {
067aa17e 1376 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1377 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1378 'only_matching': True,
6496ccb4
S
1379 },
1380 {
1381 # Rental video preview
1382 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1383 'info_dict': {
1384 'id': 'uGpuVWrhIzE',
1385 'ext': 'mp4',
1386 'title': 'Piku - Trailer',
1387 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1388 'upload_date': '20150811',
1389 'uploader': 'FlixMatrix',
1390 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1391 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1392 'license': 'Standard YouTube License',
1393 },
1394 'params': {
1395 'skip_download': True,
1396 },
eb6793ba 1397 'skip': 'This video is not available.',
022a5d66 1398 },
12afdc2a
S
1399 {
1400 # YouTube Red video with episode data
1401 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1402 'info_dict': {
1403 'id': 'iqKdEhx-dD4',
1404 'ext': 'mp4',
1405 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1406 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1407 'duration': 2085,
12afdc2a
S
1408 'upload_date': '20170118',
1409 'uploader': 'Vsauce',
1410 'uploader_id': 'Vsauce',
1411 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1412 'series': 'Mind Field',
1413 'season_number': 1,
1414 'episode_number': 1,
1415 },
1416 'params': {
1417 'skip_download': True,
1418 },
1419 'expected_warnings': [
1420 'Skipping DASH manifest',
1421 ],
1422 },
c7121fa7
S
1423 {
1424 # The following content has been identified by the YouTube community
1425 # as inappropriate or offensive to some audiences.
1426 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1427 'info_dict': {
1428 'id': '6SJNVb0GnPI',
1429 'ext': 'mp4',
1430 'title': 'Race Differences in Intelligence',
1431 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1432 'duration': 965,
1433 'upload_date': '20140124',
1434 'uploader': 'New Century Foundation',
1435 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1436 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1437 },
1438 'params': {
1439 'skip_download': True,
1440 },
545cc85d 1441 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1442 },
022a5d66
S
1443 {
1444 # itag 212
1445 'url': '1t24XAntNCY',
1446 'only_matching': True,
fd5c4aab
S
1447 },
1448 {
1449 # geo restricted to JP
1450 'url': 'sJL6WA-aGkQ',
1451 'only_matching': True,
1452 },
cd5a74a2
S
1453 {
1454 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1455 'only_matching': True,
1456 },
bc2ca1bb 1457 {
1458 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1459 'only_matching': True,
1460 },
1461 {
1462 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1463 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1464 'only_matching': True,
1465 },
825cd268
RA
1466 {
1467 # DRM protected
1468 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1469 'only_matching': True,
4fe54c12
S
1470 },
1471 {
1472 # Video with unsupported adaptive stream type formats
1473 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1474 'info_dict': {
1475 'id': 'Z4Vy8R84T1U',
1476 'ext': 'mp4',
1477 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1478 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1479 'duration': 433,
1480 'upload_date': '20130923',
1481 'uploader': 'Amelia Putri Harwita',
1482 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1483 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1484 'formats': 'maxcount:10',
1485 },
1486 'params': {
1487 'skip_download': True,
1488 'youtube_include_dash_manifest': False,
1489 },
5429d6a9 1490 'skip': 'not actual anymore',
5caabd3c 1491 },
1492 {
822b9d9c 1493 # Youtube Music Auto-generated description
5caabd3c 1494 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1495 'info_dict': {
1496 'id': 'MgNrAu2pzNs',
1497 'ext': 'mp4',
1498 'title': 'Voyeur Girl',
1499 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1500 'upload_date': '20190312',
5429d6a9
S
1501 'uploader': 'Stephen - Topic',
1502 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1503 'artist': 'Stephen',
1504 'track': 'Voyeur Girl',
1505 'album': 'it\'s too much love to know my dear',
1506 'release_date': '20190313',
1507 'release_year': 2019,
1508 },
1509 'params': {
1510 'skip_download': True,
1511 },
1512 },
66b48727
RA
1513 {
1514 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1515 'only_matching': True,
1516 },
011e75e6
S
1517 {
1518 # invalid -> valid video id redirection
1519 'url': 'DJztXj2GPfl',
1520 'info_dict': {
1521 'id': 'DJztXj2GPfk',
1522 'ext': 'mp4',
1523 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1524 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1525 'upload_date': '20090125',
1526 'uploader': 'Prochorowka',
1527 'uploader_id': 'Prochorowka',
1528 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1529 'artist': 'Panjabi MC',
1530 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1531 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1532 },
1533 'params': {
1534 'skip_download': True,
1535 },
545cc85d 1536 'skip': 'Video unavailable',
ea74e00b
DP
1537 },
1538 {
1539 # empty description results in an empty string
1540 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1541 'info_dict': {
1542 'id': 'x41yOUIvK2k',
1543 'ext': 'mp4',
1544 'title': 'IMG 3456',
1545 'description': '',
1546 'upload_date': '20170613',
1547 'uploader_id': 'ElevageOrVert',
1548 'uploader': 'ElevageOrVert',
1549 },
1550 'params': {
1551 'skip_download': True,
1552 },
1553 },
a0566bbf 1554 {
29f7c58a 1555 # with '};' inside yt initial data (see [1])
1556 # see [2] for an example with '};' inside ytInitialPlayerResponse
1557 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1558 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1559 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1560 'info_dict': {
1561 'id': 'CHqg6qOn4no',
1562 'ext': 'mp4',
1563 'title': 'Part 77 Sort a list of simple types in c#',
1564 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1565 'upload_date': '20130831',
1566 'uploader_id': 'kudvenkat',
1567 'uploader': 'kudvenkat',
1568 },
1569 'params': {
1570 'skip_download': True,
1571 },
1572 },
29f7c58a 1573 {
1574 # another example of '};' in ytInitialData
1575 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1576 'only_matching': True,
1577 },
1578 {
1579 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1580 'only_matching': True,
1581 },
545cc85d 1582 {
cc2db878 1583 # https://github.com/ytdl-org/youtube-dl/pull/28094
1584 'url': 'OtqTfy26tG0',
1585 'info_dict': {
1586 'id': 'OtqTfy26tG0',
1587 'ext': 'mp4',
1588 'title': 'Burn Out',
1589 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1590 'upload_date': '20141120',
1591 'uploader': 'The Cinematic Orchestra - Topic',
1592 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1593 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1594 'artist': 'The Cinematic Orchestra',
1595 'track': 'Burn Out',
1596 'album': 'Every Day',
1597 'release_data': None,
1598 'release_year': None,
1599 },
1600 'params': {
1601 'skip_download': True,
1602 },
545cc85d 1603 },
bc2ca1bb 1604 {
1605 # controversial video, only works with bpctr when authenticated with cookies
1606 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1607 'only_matching': True,
1608 },
f7ad7160 1609 {
1610 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1611 'url': 'cBvYw8_A0vQ',
1612 'info_dict': {
1613 'id': 'cBvYw8_A0vQ',
1614 'ext': 'mp4',
1615 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1616 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1617 'upload_date': '20201120',
1618 'uploader': 'Walk around Japan',
1619 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1620 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1621 },
1622 'params': {
1623 'skip_download': True,
1624 },
0fb983f6 1625 }, {
1626 # Has multiple audio streams
1627 'url': 'WaOKSUlf4TM',
1628 'only_matching': True
9297939e 1629 }, {
1630 # Requires Premium: has format 141 when requested using YTM url
1631 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1632 'only_matching': True
1633 }, {
120916da 1634 # multiple subtitles with same lang_code
1635 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1636 'only_matching': True,
109dd3b2 1637 }, {
1638 # Force use android client fallback
1639 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1640 'info_dict': {
1641 'id': 'YOelRv7fMxY',
1642 'title': 'Digging a Secret Tunnel from my Workshop',
1643 'ext': '3gp',
1644 'upload_date': '20210624',
1645 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1646 'uploader': 'colinfurze',
1647 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1648 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1649 },
1650 'params': {
1651 'format': '17', # 3gp format available on android
1652 'extractor_args': {'youtube': {'player_client': ['android']}},
1653 },
120916da 1654 },
109dd3b2 1655 {
1656 # Skip download of additional client configs (remix client config in this case)
1657 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1658 'only_matching': True,
1659 'params': {
1660 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1661 },
1662 }
2eb88d95
PH
1663 ]
1664
201c1459 1665 @classmethod
1666 def suitable(cls, url):
1bdae7d3 1667 # Hack for lazy extractors until more generic solution is implemented
1668 # (see #28780)
1669 from .youtube import parse_qs
201c1459 1670 qs = parse_qs(url)
1671 if qs.get('list', [None])[0]:
1672 return False
1673 return super(YoutubeIE, cls).suitable(url)
1674
e0df6211
PH
1675 def __init__(self, *args, **kwargs):
1676 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1677 self._code_cache = {}
83799698 1678 self._player_cache = {}
e0df6211 1679
109dd3b2 1680 def _extract_player_url(self, ytcfg=None, webpage=None):
1681 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1682 if not player_url:
1683 player_url = self._search_regex(
1684 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1685 webpage, 'player URL', fatal=False)
1686 if player_url.startswith('//'):
1687 player_url = 'https:' + player_url
1688 elif not re.match(r'https?://', player_url):
1689 player_url = compat_urlparse.urljoin(
1690 'https://www.youtube.com', player_url)
1691 return player_url
1692
60064c53
PH
1693 def _signature_cache_id(self, example_sig):
1694 """ Return a string representation of a signature """
78caa52a 1695 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1696
e40c758c
S
1697 @classmethod
1698 def _extract_player_info(cls, player_url):
1699 for player_re in cls._PLAYER_INFO_RE:
1700 id_m = re.search(player_re, player_url)
1701 if id_m:
1702 break
1703 else:
c081b35c 1704 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1705 return id_m.group('id')
e40c758c 1706
109dd3b2 1707 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1708 player_id = self._extract_player_info(player_url)
1709 if player_id not in self._code_cache:
1710 self._code_cache[player_id] = self._download_webpage(
1711 player_url, video_id, fatal=fatal,
1712 note='Downloading player ' + player_id,
1713 errnote='Download of %s failed' % player_url)
1714 return player_id in self._code_cache
1715
e40c758c 1716 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1717 player_id = self._extract_player_info(player_url)
e0df6211 1718
c4417ddb 1719 # Read from filesystem cache
545cc85d 1720 func_id = 'js_%s_%s' % (
1721 player_id, self._signature_cache_id(example_sig))
c4417ddb 1722 assert os.path.basename(func_id) == func_id
a0e07d31 1723
69ea8ca4 1724 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1725 if cache_spec is not None:
78caa52a 1726 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1727
109dd3b2 1728 if self._load_player(video_id, player_url):
1729 code = self._code_cache[player_id]
1730 res = self._parse_sig_js(code)
e0df6211 1731
109dd3b2 1732 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1733 cache_res = res(test_string)
1734 cache_spec = [ord(c) for c in cache_res]
83799698 1735
109dd3b2 1736 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1737 return res
83799698 1738
60064c53 1739 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1740 def gen_sig_code(idxs):
1741 def _genslice(start, end, step):
78caa52a 1742 starts = '' if start == 0 else str(start)
8bcc8756 1743 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1744 steps = '' if step == 1 else (':%d' % step)
78caa52a 1745 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1746
1747 step = None
7af808a5
PH
1748 # Quelch pyflakes warnings - start will be set when step is set
1749 start = '(Never used)'
edf3e38e
PH
1750 for i, prev in zip(idxs[1:], idxs[:-1]):
1751 if step is not None:
1752 if i - prev == step:
1753 continue
1754 yield _genslice(start, prev, step)
1755 step = None
1756 continue
1757 if i - prev in [-1, 1]:
1758 step = i - prev
1759 start = prev
1760 continue
1761 else:
78caa52a 1762 yield 's[%d]' % prev
edf3e38e 1763 if step is None:
78caa52a 1764 yield 's[%d]' % i
edf3e38e
PH
1765 else:
1766 yield _genslice(start, i, step)
1767
78caa52a 1768 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1769 cache_res = func(test_string)
edf3e38e 1770 cache_spec = [ord(c) for c in cache_res]
78caa52a 1771 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1772 signature_id_tuple = '(%s)' % (
1773 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1774 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1775 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1776 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1777
e0df6211
PH
1778 def _parse_sig_js(self, jscode):
1779 funcname = self._search_regex(
abefc03f
S
1780 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1781 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1782 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1783 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1784 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1785 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1786 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1787 # Obsolete patterns
1788 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1789 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1790 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1791 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1792 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1793 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1794 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1795 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1796 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1797
1798 jsi = JSInterpreter(jscode)
1799 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1800 return lambda s: initial_function([s])
1801
545cc85d 1802 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1803 """Turn the encrypted s field into a working signature"""
6b37f0be 1804
c8bf86d5 1805 if player_url is None:
69ea8ca4 1806 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1807
c8bf86d5 1808 try:
62af3a0e 1809 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1810 if player_id not in self._player_cache:
1811 func = self._extract_signature_function(
60064c53 1812 video_id, player_url, s
c8bf86d5
PH
1813 )
1814 self._player_cache[player_id] = func
1815 func = self._player_cache[player_id]
a06916d9 1816 if self.get_param('youtube_print_sig_code'):
60064c53 1817 self._print_sig_code(func, s)
c8bf86d5
PH
1818 return func(s)
1819 except Exception as e:
1820 tb = traceback.format_exc()
1821 raise ExtractorError(
78caa52a 1822 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1823
109dd3b2 1824 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1825 """
1826 Extract signatureTimestamp (sts)
1827 Required to tell API what sig/player version is in use.
1828 """
1829 sts = None
1830 if isinstance(ytcfg, dict):
1831 sts = int_or_none(ytcfg.get('STS'))
1832
1833 if not sts:
1834 # Attempt to extract from player
1835 if player_url is None:
1836 error_msg = 'Cannot extract signature timestamp without player_url.'
1837 if fatal:
1838 raise ExtractorError(error_msg)
1839 self.report_warning(error_msg)
1840 return
1841 if self._load_player(video_id, player_url, fatal=fatal):
1842 player_id = self._extract_player_info(player_url)
1843 code = self._code_cache[player_id]
1844 sts = int_or_none(self._search_regex(
1845 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1846 'JS player signature timestamp', group='sts', fatal=fatal))
1847 return sts
1848
545cc85d 1849 def _mark_watched(self, video_id, player_response):
21c340b8
S
1850 playback_url = url_or_none(try_get(
1851 player_response,
545cc85d 1852 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1853 if not playback_url:
1854 return
1855 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1856 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1857
1858 # cpn generation algorithm is reverse engineered from base.js.
1859 # In fact it works even with dummy cpn.
1860 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1861 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1862
1863 qs.update({
1864 'ver': ['2'],
1865 'cpn': [cpn],
1866 })
1867 playback_url = compat_urlparse.urlunparse(
15707c7e 1868 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1869
1870 self._download_webpage(
1871 playback_url, video_id, 'Marking watched',
1872 'Unable to mark watched', fatal=False)
1873
66c9fa36
S
1874 @staticmethod
1875 def _extract_urls(webpage):
1876 # Embedded YouTube player
1877 entries = [
1878 unescapeHTML(mobj.group('url'))
1879 for mobj in re.finditer(r'''(?x)
1880 (?:
1881 <iframe[^>]+?src=|
1882 data-video-url=|
1883 <embed[^>]+?src=|
1884 embedSWF\(?:\s*|
1885 <object[^>]+data=|
1886 new\s+SWFObject\(
1887 )
1888 (["\'])
1889 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1890 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1891 \1''', webpage)]
1892
1893 # lazyYT YouTube embed
1894 entries.extend(list(map(
1895 unescapeHTML,
1896 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1897
1898 # Wordpress "YouTube Video Importer" plugin
1899 matches = re.findall(r'''(?x)<div[^>]+
1900 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1901 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1902 entries.extend(m[-1] for m in matches)
1903
1904 return entries
1905
1906 @staticmethod
1907 def _extract_url(webpage):
1908 urls = YoutubeIE._extract_urls(webpage)
1909 return urls[0] if urls else None
1910
97665381
PH
1911 @classmethod
1912 def extract_id(cls, url):
1913 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1914 if mobj is None:
69ea8ca4 1915 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1916 video_id = mobj.group(2)
1917 return video_id
1918
545cc85d 1919 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1920 chapters_list = try_get(
8bdd16b4 1921 data,
84213ea8
S
1922 lambda x: x['playerOverlays']
1923 ['playerOverlayRenderer']
1924 ['decoratedPlayerBarRenderer']
1925 ['decoratedPlayerBarRenderer']
1926 ['playerBar']
1927 ['chapteredPlayerBarRenderer']
1928 ['chapters'],
1929 list)
1930 if not chapters_list:
1931 return
1932
1933 def chapter_time(chapter):
1934 return float_or_none(
1935 try_get(
1936 chapter,
1937 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1938 int),
1939 scale=1000)
1940 chapters = []
1941 for next_num, chapter in enumerate(chapters_list, start=1):
1942 start_time = chapter_time(chapter)
1943 if start_time is None:
1944 continue
1945 end_time = (chapter_time(chapters_list[next_num])
1946 if next_num < len(chapters_list) else duration)
1947 if end_time is None:
1948 continue
1949 title = try_get(
1950 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1951 compat_str)
1952 chapters.append({
1953 'start_time': start_time,
1954 'end_time': end_time,
1955 'title': title,
1956 })
1957 return chapters
1958
545cc85d 1959 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1960 return self._parse_json(self._search_regex(
1961 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1962 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1963
d92f5d5a 1964 @staticmethod
1965 def parse_time_text(time_text):
1966 """
1967 Parse the comment time text
1968 time_text is in the format 'X units ago (edited)'
1969 """
1970 time_text_split = time_text.split(' ')
1971 if len(time_text_split) >= 3:
1972 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1973
a1c5d2ca
M
1974 @staticmethod
1975 def _join_text_entries(runs):
1976 text = None
1977 for run in runs:
1978 if not isinstance(run, dict):
1979 continue
1980 sub_text = try_get(run, lambda x: x['text'], compat_str)
1981 if sub_text:
1982 if not text:
1983 text = sub_text
1984 continue
1985 text += sub_text
1986 return text
1987
1988 def _extract_comment(self, comment_renderer, parent=None):
1989 comment_id = comment_renderer.get('commentId')
1990 if not comment_id:
1991 return
1992 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1993 text = self._join_text_entries(comment_text_runs) or ''
1994 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1995 time_text = self._join_text_entries(comment_time_text)
49bd8c66 1996 # note: timestamp is an estimate calculated from the current time and time_text
d92f5d5a 1997 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1998 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1999 author_id = try_get(comment_renderer,
2000 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
49bd8c66 2001 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2002 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2003 author_thumbnail = try_get(comment_renderer,
2004 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2005
2006 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2007 is_favorited = 'creatorHeart' in (try_get(
2008 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2009 return {
2010 'id': comment_id,
2011 'text': text,
d92f5d5a 2012 'timestamp': timestamp,
a1c5d2ca
M
2013 'time_text': time_text,
2014 'like_count': votes,
97524332 2015 'is_favorited': is_favorited,
a1c5d2ca
M
2016 'author': author,
2017 'author_id': author_id,
2018 'author_thumbnail': author_thumbnail,
2019 'author_is_uploader': author_is_uploader,
2020 'parent': parent or 'root'
2021 }
2022
2023 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2024 ytcfg, video_id, parent=None, comment_counts=None):
2025
2026 def extract_header(contents):
2027 _total_comments = 0
2028 _continuation = None
2029 for content in contents:
2030 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2031 expected_comment_count = try_get(comments_header_renderer,
2032 (lambda x: x['countText']['runs'][0]['text'],
2033 lambda x: x['commentsCount']['runs'][0]['text']),
2034 compat_str)
2035 if expected_comment_count:
2036 comment_counts[1] = str_to_int(expected_comment_count)
2037 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
2038 _total_comments = comment_counts[1]
2039 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2040 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2041
2042 sort_menu_item = try_get(
2043 comments_header_renderer,
2044 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2045 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2046
2047 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2048 if not _continuation:
2049 continue
2050
2051 sort_text = sort_menu_item.get('title')
2052 if isinstance(sort_text, compat_str):
2053 sort_text = sort_text.lower()
2054 else:
2055 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2056 self.to_screen('Sorting comments by %s' % sort_text)
2057 break
2058 return _total_comments, _continuation
a1c5d2ca 2059
2d6659b9 2060 def extract_thread(contents):
a1c5d2ca
M
2061 if not parent:
2062 comment_counts[2] = 0
2063 for content in contents:
2064 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2065 comment_renderer = try_get(
2066 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2067 content, (lambda x: x['commentRenderer'], dict))
2068
2069 if not comment_renderer:
2070 continue
2071 comment = self._extract_comment(comment_renderer, parent)
2072 if not comment:
2073 continue
2074 comment_counts[0] += 1
2075 yield comment
2076 # Attempt to get the replies
2077 comment_replies_renderer = try_get(
2078 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2079
2080 if comment_replies_renderer:
2081 comment_counts[2] += 1
2082 comment_entries_iter = self._comment_entries(
f4f751af 2083 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2084 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2085
2086 for reply_comment in comment_entries_iter:
2087 yield reply_comment
2088
2d6659b9 2089 # YouTube comments have a max depth of 2
2090 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2091 if max_depth == 1 and parent:
2092 return
a1c5d2ca
M
2093 if not comment_counts:
2094 # comment so far, est. total comments, current comment thread #
2095 comment_counts = [0, 0, 0]
a1c5d2ca 2096
2d6659b9 2097 continuation = self._extract_continuation(root_continuation_data)
2098 if continuation and len(continuation['ctoken']) < 27:
2099 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2100 continuation_token = self._generate_comment_continuation(video_id)
2101 continuation = self._build_continuation_query(continuation_token, None)
2102
2103 visitor_data = None
2104 is_first_continuation = parent is None
a1c5d2ca
M
2105
2106 for page_num in itertools.count(0):
2107 if not continuation:
2108 break
f4f751af 2109 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2110 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2111 if page_num == 0:
2112 if is_first_continuation:
2113 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2114 else:
2d6659b9 2115 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2116 comment_counts[2], comment_prog_str)
2117 else:
2118 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2119 ' ' if parent else '', ' replies' if parent else '',
2120 page_num, comment_prog_str)
2121
2122 response = self._extract_response(
2123 item_id=None, query=self._continuation_query_ajax_to_api(continuation),
2124 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2125 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2126 if not response:
2127 break
f4f751af 2128 visitor_data = try_get(
2129 response,
2130 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2131 compat_str) or visitor_data
a1c5d2ca 2132
2d6659b9 2133 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2134
2d6659b9 2135 continuation = None
2136 if isinstance(continuation_contents, list):
2137 for continuation_section in continuation_contents:
2138 if not isinstance(continuation_section, dict):
2139 continue
2140 continuation_items = try_get(
2141 continuation_section,
2142 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2143 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2144 list) or []
2145 if is_first_continuation:
2146 total_comments, continuation = extract_header(continuation_items)
2147 if total_comments:
2148 yield total_comments
2149 is_first_continuation = False
2150 if continuation:
2151 break
2152 continue
2153 count = 0
2154 for count, entry in enumerate(extract_thread(continuation_items)):
2155 yield entry
2156 continuation = self._extract_continuation({'contents': continuation_items})
2157 if continuation:
2158 # Sometimes YouTube provides a continuation without any comments
2159 # In most cases we end up just downloading these with very little comments to come.
2160 if count == 0:
2161 if not parent:
2162 self.report_warning('No comments received - assuming end of comments')
2163 continuation = None
a1c5d2ca
M
2164 break
2165
2d6659b9 2166 # Deprecated response structure
2167 elif isinstance(continuation_contents, dict):
2168 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2169 for key, continuation_renderer in continuation_contents.items():
2170 if key not in known_continuation_renderers:
2171 continue
2172 if not isinstance(continuation_renderer, dict):
2173 continue
2174 if is_first_continuation:
2175 header_continuation_items = [continuation_renderer.get('header') or {}]
2176 total_comments, continuation = extract_header(header_continuation_items)
2177 if total_comments:
2178 yield total_comments
2179 is_first_continuation = False
2180 if continuation:
2181 break
a1c5d2ca 2182
2d6659b9 2183 # Sometimes YouTube provides a continuation without any comments
2184 # In most cases we end up just downloading these with very little comments to come.
2185 count = 0
2186 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2187 yield entry
2188 continuation = self._extract_continuation(continuation_renderer)
2189 if count == 0:
2190 if not parent:
2191 self.report_warning('No comments received - assuming end of comments')
2192 continuation = None
2193 break
a1c5d2ca 2194
2d6659b9 2195 @staticmethod
2196 def _generate_comment_continuation(video_id):
2197 """
2198 Generates initial comment section continuation token from given video id
2199 """
2200 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2201 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2202 new_continuation_intlist = list(itertools.chain.from_iterable(
2203 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2204 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2205
2206 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2207 """Entry for comment extraction"""
2d6659b9 2208 def _real_comment_extract(contents):
2209 if isinstance(contents, list):
2210 for entry in contents:
2211 for key, renderer in entry.items():
2212 if key not in known_entry_comment_renderers:
2213 continue
2214 yield from self._comment_entries(
2215 renderer, video_id=video_id, ytcfg=ytcfg,
2216 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2217 account_syncid=self._extract_account_syncid(ytcfg))
2218 break
a1c5d2ca 2219 comments = []
2d6659b9 2220 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2221 estimated_total = 0
2d6659b9 2222 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2223
2d6659b9 2224 try:
2225 for comment in _real_comment_extract(contents):
2226 if len(comments) >= max_comments:
2227 break
2228 if isinstance(comment, int):
2229 estimated_total = comment
2230 continue
2231 comments.append(comment)
2232 except KeyboardInterrupt:
2233 self.to_screen('Interrupted by user')
d92f5d5a 2234 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2235 return {
2236 'comments': comments,
2237 'comment_count': len(comments),
2238 }
2239
109dd3b2 2240 @staticmethod
2241 def _generate_player_context(sts=None):
2242 context = {
2243 'html5Preference': 'HTML5_PREF_WANTS',
2244 }
2245 if sts is not None:
2246 context['signatureTimestamp'] = sts
2247 return {
2248 'playbackContext': {
2249 'contentPlaybackContext': context
2250 }
2251 }
2252
4e6767b5 2253 @staticmethod
c888ffb9 2254 def _get_video_info_params(video_id, client='TVHTML5'):
2255 GVI_CLIENTS = {
2256 'ANDROID': {
2257 'c': 'ANDROID',
2258 'cver': '16.20',
2259 },
2260 'TVHTML5': {
2261 'c': 'TVHTML5',
2262 'cver': '6.20180913',
2263 }
2264 }
2265 query = {
4e6767b5 2266 'video_id': video_id,
2267 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c888ffb9 2268 'html5': '1'
4e6767b5 2269 }
c888ffb9 2270 query.update(GVI_CLIENTS.get(client))
2271 return query
4e6767b5 2272
c5e8d7af 2273 def _real_extract(self, url):
cf7e015f 2274 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 2275 video_id = self._match_id(url)
9297939e 2276
2277 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2278
545cc85d 2279 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 2280 webpage_url = base_url + 'watch?v=' + video_id
2281 webpage = self._download_webpage(
cce889b9 2282 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 2283
109dd3b2 2284 ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2285 identity_token = self._extract_identity_token(webpage, video_id)
2286 syncid = self._extract_account_syncid(ytcfg)
2287 headers = self._generate_api_headers(ytcfg, identity_token, syncid)
2288
2289 player_url = self._extract_player_url(ytcfg, webpage)
2290
2d6659b9 2291 player_client = self._configuration_arg('player_client', [''])[0]
4bb6b02f 2292 if player_client not in ('web', 'android', ''):
c888ffb9 2293 self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')
2294 force_mobile_client = player_client != 'web'
4bb6b02f 2295 player_skip = self._configuration_arg('player_skip')
109dd3b2 2296
9297939e 2297 def get_text(x):
2298 if not x:
2299 return
2300 text = x.get('simpleText')
2301 if text and isinstance(text, compat_str):
2302 return text
2303 runs = x.get('runs')
2304 if not isinstance(runs, list):
2305 return
2306 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
2307
2308 ytm_streaming_data = {}
2309 if is_music_url:
109dd3b2 2310 ytm_webpage = None
2311 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2312 if sts and not force_mobile_client and 'configs' not in player_skip:
2313 ytm_webpage = self._download_webpage(
2314 'https://music.youtube.com',
2d6659b9 2315 video_id, fatal=False, note='Downloading remix client config')
109dd3b2 2316
2317 ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2318 ytm_client = 'WEB_REMIX'
2319 if not sts or force_mobile_client:
2320 # Android client already has signature descrambled
2321 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2322 if not sts:
c888ffb9 2323 self.report_warning('Falling back to android remix client for player API.')
109dd3b2 2324 ytm_client = 'ANDROID_MUSIC'
2325 ytm_cfg = {}
2326
2327 ytm_headers = self._generate_api_headers(
2328 ytm_cfg, identity_token, syncid,
2329 client=ytm_client)
2330 ytm_query = {'videoId': video_id}
2331 ytm_query.update(self._generate_player_context(sts))
2332
2333 ytm_player_response = self._extract_response(
2334 item_id=video_id, ep='player', query=ytm_query,
2335 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2336 default_client=ytm_client,
c888ffb9 2337 note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))
2d6659b9 2338 ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
109dd3b2 2339
545cc85d 2340 player_response = None
2341 if webpage:
2342 player_response = self._extract_yt_initial_variable(
2343 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2344 video_id, 'initial player response')
f4f751af 2345
109dd3b2 2346 if not player_response or force_mobile_client:
2347 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2348 yt_client = 'WEB'
2349 ytpcfg = ytcfg
2350 ytp_headers = headers
2351 if not sts or force_mobile_client:
2352 # Android client already has signature descrambled
2353 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2354 if not sts:
c888ffb9 2355 self.report_warning('Falling back to android client for player API.')
109dd3b2 2356 yt_client = 'ANDROID'
2357 ytpcfg = {}
2358 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid, yt_client)
2359
2360 yt_query = {'videoId': video_id}
2361 yt_query.update(self._generate_player_context(sts))
2362 player_response = self._extract_response(
2363 item_id=video_id, ep='player', query=yt_query,
2364 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2365 default_client=yt_client,
c888ffb9 2366 note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')
2367 ) or player_response
545cc85d 2368
109dd3b2 2369 # Age-gate workarounds
545cc85d 2370 playability_status = player_response.get('playabilityStatus') or {}
109dd3b2 2371 if playability_status.get('reason') in self._AGE_GATE_REASONS:
c888ffb9 2372 gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')
2373 for gvi_client in gvi_clients:
2374 pr = self._parse_json(try_get(compat_parse_qs(
2375 self._download_webpage(
2376 base_url + 'get_video_info', video_id,
2377 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2378 'unable to download video info webpage', fatal=False,
2379 query=self._get_video_info_params(video_id, client=gvi_client))),
2380 lambda x: x['player_response'][0],
2381 compat_str) or '{}', video_id)
2382 if pr:
2383 break
109dd3b2 2384 if not pr:
2385 self.report_warning('Falling back to embedded-only age-gate workaround.')
2386 embed_webpage = None
2387 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2388 if sts and not force_mobile_client and 'configs' not in player_skip:
2389 embed_webpage = self._download_webpage(
2390 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2391 video_id=video_id, note='Downloading age-gated embed config')
2392
2393 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2394 # If we extracted the embed webpage, it'll tell us if we can view the video
2395 embedded_pr = self._parse_json(
2396 try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2397 video_id=video_id)
2398 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2399 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2400 yt_client = 'WEB_EMBEDDED_PLAYER'
2401 if not sts or force_mobile_client:
2402 # Android client already has signature descrambled
2403 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2404 if not sts:
2405 self.report_warning(
c888ffb9 2406 'Falling back to android embedded client for player API (note: some formats may be missing).')
109dd3b2 2407 yt_client = 'ANDROID_EMBEDDED_PLAYER'
2408 ytcfg_age = {}
2409
2410 ytage_headers = self._generate_api_headers(
2411 ytcfg_age, identity_token, syncid, client=yt_client)
2412 yt_age_query = {'videoId': video_id}
2413 yt_age_query.update(self._generate_player_context(sts))
2414 pr = self._extract_response(
2415 item_id=video_id, ep='player', query=yt_age_query,
2416 ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2417 default_client=yt_client,
c888ffb9 2418 note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')
109dd3b2 2419 ) or {}
2420
545cc85d 2421 if pr:
2422 player_response = pr
2423
2424 trailer_video_id = try_get(
2425 playability_status,
2426 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2427 compat_str)
2428 if trailer_video_id:
2429 return self.url_result(
2430 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 2431
545cc85d 2432 search_meta = (
2433 lambda x: self._html_search_meta(x, webpage, default=None)) \
2434 if webpage else lambda x: None
dbdaaa23 2435
545cc85d 2436 video_details = player_response.get('videoDetails') or {}
37357d21 2437 microformat = try_get(
545cc85d 2438 player_response,
2439 lambda x: x['microformat']['playerMicroformatRenderer'],
2440 dict) or {}
2441 video_title = video_details.get('title') \
2442 or get_text(microformat.get('title')) \
2443 or search_meta(['og:title', 'twitter:title', 'title'])
2444 video_description = video_details.get('shortDescription')
cf7e015f 2445
8fe10494 2446 if not smuggled_data.get('force_singlefeed', False):
a06916d9 2447 if not self.get_param('noplaylist'):
8fe10494
S
2448 multifeed_metadata_list = try_get(
2449 player_response,
2450 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 2451 compat_str)
8fe10494
S
2452 if multifeed_metadata_list:
2453 entries = []
2454 feed_ids = []
2455 for feed in multifeed_metadata_list.split(','):
2456 # Unquote should take place before split on comma (,) since textual
2457 # fields may contain comma as well (see
067aa17e 2458 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 2459 feed_data = compat_parse_qs(
2460 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
2461
2462 def feed_entry(name):
545cc85d 2463 return try_get(
2464 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
2465
2466 feed_id = feed_entry('id')
2467 if not feed_id:
2468 continue
2469 feed_title = feed_entry('title')
2470 title = video_title
2471 if feed_title:
2472 title += ' (%s)' % feed_title
8fe10494
S
2473 entries.append({
2474 '_type': 'url_transparent',
2475 'ie_key': 'Youtube',
2476 'url': smuggle_url(
545cc85d 2477 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2478 {'force_singlefeed': True}),
6b09401b 2479 'title': title,
8fe10494 2480 })
6b09401b 2481 feed_ids.append(feed_id)
8fe10494
S
2482 self.to_screen(
2483 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2484 % (', '.join(feed_ids), video_id))
545cc85d 2485 return self.playlist_result(
2486 entries, video_id, video_title, video_description)
8fe10494
S
2487 else:
2488 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2489
9297939e 2490 formats, itags, stream_ids = [], [], []
cc2db878 2491 itag_qualities = {}
d3fc8074 2492 q = qualities([
60bdb7bd 2493 # "tiny" is the smallest video-only format. But some audio-only formats
2494 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2495 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2496 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2497 ])
9297939e 2498
545cc85d 2499 streaming_data = player_response.get('streamingData') or {}
2500 streaming_formats = streaming_data.get('formats') or []
2501 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2502 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2503 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2504
545cc85d 2505 for fmt in streaming_formats:
2506 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2507 continue
321bf820 2508
cc2db878 2509 itag = str_or_none(fmt.get('itag'))
9297939e 2510 audio_track = fmt.get('audioTrack') or {}
2511 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2512 if stream_id in stream_ids:
2513 continue
2514
cc2db878 2515 quality = fmt.get('quality')
d3fc8074 2516 if quality == 'tiny' or not quality:
2517 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2518 if itag and quality:
2519 itag_qualities[itag] = quality
2520 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2521 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2522 # number of fragment that would subsequently requested with (`&sq=N`)
2523 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2524 continue
2525
545cc85d 2526 fmt_url = fmt.get('url')
2527 if not fmt_url:
2528 sc = compat_parse_qs(fmt.get('signatureCipher'))
2529 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2530 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2531 if not (sc and fmt_url and encrypted_sig):
2532 continue
545cc85d 2533 if not player_url:
201e9eaa 2534 continue
545cc85d 2535 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2536 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2537 fmt_url += '&' + sp + '=' + signature
2538
545cc85d 2539 if itag:
2540 itags.append(itag)
9297939e 2541 stream_ids.append(stream_id)
2542
cc2db878 2543 tbr = float_or_none(
2544 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2545 dct = {
2546 'asr': int_or_none(fmt.get('audioSampleRate')),
2547 'filesize': int_or_none(fmt.get('contentLength')),
2548 'format_id': itag,
0fb983f6 2549 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2550 'fps': int_or_none(fmt.get('fps')),
2551 'height': int_or_none(fmt.get('height')),
dca3ff4a 2552 'quality': q(quality),
cc2db878 2553 'tbr': tbr,
545cc85d 2554 'url': fmt_url,
2555 'width': fmt.get('width'),
0fb983f6 2556 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2557 }
60bdb7bd 2558 mime_mobj = re.match(
2559 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2560 if mime_mobj:
2561 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2562 dct.update(parse_codecs(mime_mobj.group(2)))
2563 # The 3gp format in android client has a quality of "small",
2564 # but is actually worse than all other formats
2565 if dct['ext'] == '3gp':
2566 dct['quality'] = q('tiny')
cc2db878 2567 no_audio = dct.get('acodec') == 'none'
2568 no_video = dct.get('vcodec') == 'none'
2569 if no_audio:
2570 dct['vbr'] = tbr
2571 if no_video:
2572 dct['abr'] = tbr
2573 if no_audio or no_video:
545cc85d 2574 dct['downloader_options'] = {
2575 # Youtube throttles chunks >~10M
2576 'http_chunk_size': 10485760,
bf1317d2 2577 }
7c60c33e 2578 if dct.get('ext'):
2579 dct['container'] = dct['ext'] + '_dash'
545cc85d 2580 formats.append(dct)
2581
4bb6b02f 2582 skip_manifests = self._configuration_arg('skip')
5d3a0e79 2583 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2584 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2585
9297939e 2586 for sd in (streaming_data, ytm_streaming_data):
5d3a0e79 2587 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2588 if hls_manifest_url:
2589 for f in self._extract_m3u8_formats(
2590 hls_manifest_url, video_id, 'mp4', fatal=False):
2591 itag = self._search_regex(
2592 r'/itag/(\d+)', f['url'], 'itag', default=None)
2593 if itag:
2594 f['format_id'] = itag
8d68ab98 2595 formats.append(f)
545cc85d 2596
5d3a0e79 2597 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2598 if dash_manifest_url:
2599 for f in self._extract_mpd_formats(
2600 dash_manifest_url, video_id, fatal=False):
2601 itag = f['format_id']
2602 if itag in itags:
2603 continue
2604 if itag in itag_qualities:
2605 f['quality'] = q(itag_qualities[itag])
2606 filesize = int_or_none(self._search_regex(
2607 r'/clen/(\d+)', f.get('fragment_base_url')
2608 or f['url'], 'file size', default=None))
2609 if filesize:
2610 f['filesize'] = filesize
2611 formats.append(f)
bf1317d2 2612
545cc85d 2613 if not formats:
a06916d9 2614 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2615 self.raise_no_formats(
545cc85d 2616 'This video is DRM protected.', expected=True)
2617 pemr = try_get(
2618 playability_status,
2619 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2620 dict) or {}
2621 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2622 subreason = pemr.get('subreason')
2623 if subreason:
2624 subreason = clean_html(get_text(subreason))
2625 if subreason == 'The uploader has not made this video available in your country.':
2626 countries = microformat.get('availableCountries')
2627 if not countries:
2628 regions_allowed = search_meta('regionsAllowed')
2629 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2630 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2631 reason += '\n' + subreason
2632 if reason:
b7da73eb 2633 self.raise_no_formats(reason, expected=True)
bf1317d2 2634
545cc85d 2635 self._sort_formats(formats)
bf1317d2 2636
545cc85d 2637 keywords = video_details.get('keywords') or []
2638 if not keywords and webpage:
2639 keywords = [
2640 unescapeHTML(m.group('content'))
2641 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2642 for keyword in keywords:
2643 if keyword.startswith('yt:stretch='):
201c1459 2644 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2645 if mobj:
2646 # NB: float is intentional for forcing float division
2647 w, h = (float(v) for v in mobj.groups())
2648 if w > 0 and h > 0:
2649 ratio = w / h
2650 for f in formats:
2651 if f.get('vcodec') != 'none':
2652 f['stretched_ratio'] = ratio
2653 break
6449cd80 2654
545cc85d 2655 thumbnails = []
2656 for container in (video_details, microformat):
2657 for thumbnail in (try_get(
2658 container,
2659 lambda x: x['thumbnail']['thumbnails'], list) or []):
2660 thumbnail_url = thumbnail.get('url')
2661 if not thumbnail_url:
bf1317d2 2662 continue
1988fab7 2663 # Sometimes youtube gives a wrong thumbnail URL. See:
2664 # https://github.com/yt-dlp/yt-dlp/issues/233
2665 # https://github.com/ytdl-org/youtube-dl/issues/28023
2666 if 'maxresdefault' in thumbnail_url:
2667 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2668 thumbnails.append({
545cc85d 2669 'url': thumbnail_url,
ff2751ac 2670 'height': int_or_none(thumbnail.get('height')),
545cc85d 2671 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2672 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2673 })
ff2751ac 2674 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2675 if thumbnail_url:
2676 thumbnails.append({
2677 'url': thumbnail_url,
2678 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2679 })
2680 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2681 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2682 thumbnails.append({
2683 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2684 'preference': 1,
2685 })
2686 self._remove_duplicate_formats(thumbnails)
545cc85d 2687
2688 category = microformat.get('category') or search_meta('genre')
2689 channel_id = video_details.get('channelId') \
2690 or microformat.get('externalChannelId') \
2691 or search_meta('channelId')
2692 duration = int_or_none(
2693 video_details.get('lengthSeconds')
2694 or microformat.get('lengthSeconds')) \
2695 or parse_duration(search_meta('duration'))
2696 is_live = video_details.get('isLive')
f6745c49 2697 is_upcoming = video_details.get('isUpcoming')
545cc85d 2698 owner_profile_url = microformat.get('ownerProfileUrl')
2699
2700 info = {
2701 'id': video_id,
2702 'title': self._live_title(video_title) if is_live else video_title,
2703 'formats': formats,
2704 'thumbnails': thumbnails,
2705 'description': video_description,
2706 'upload_date': unified_strdate(
2707 microformat.get('uploadDate')
2708 or search_meta('uploadDate')),
2709 'uploader': video_details['author'],
2710 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2711 'uploader_url': owner_profile_url,
2712 'channel_id': channel_id,
2713 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2714 'duration': duration,
2715 'view_count': int_or_none(
2716 video_details.get('viewCount')
2717 or microformat.get('viewCount')
2718 or search_meta('interactionCount')),
2719 'average_rating': float_or_none(video_details.get('averageRating')),
2720 'age_limit': 18 if (
2721 microformat.get('isFamilySafe') is False
2722 or search_meta('isFamilyFriendly') == 'false'
2723 or search_meta('og:restrictions:age') == '18+') else 0,
2724 'webpage_url': webpage_url,
2725 'categories': [category] if category else None,
2726 'tags': keywords,
2727 'is_live': is_live,
2728 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2729 'was_live': video_details.get('isLiveContent'),
545cc85d 2730 }
b477fc13 2731
545cc85d 2732 pctr = try_get(
2733 player_response,
2734 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2735 subtitles = {}
2736 if pctr:
774d79cc 2737 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2738 lang_subs = container.setdefault(lang_code, [])
545cc85d 2739 for fmt in self._SUBTITLE_FORMATS:
2740 query.update({
2741 'fmt': fmt,
2742 })
2743 lang_subs.append({
2744 'ext': fmt,
2745 'url': update_url_query(base_url, query),
774d79cc 2746 'name': sub_name,
545cc85d 2747 })
7e72694b 2748
545cc85d 2749 for caption_track in (pctr.get('captionTracks') or []):
2750 base_url = caption_track.get('baseUrl')
2751 if not base_url:
2752 continue
2753 if caption_track.get('kind') != 'asr':
120916da 2754 lang_code = (
2755 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2756 or caption_track.get('languageCode'))
545cc85d 2757 if not lang_code:
2758 continue
2759 process_language(
774d79cc 2760 subtitles, base_url, lang_code,
2d6659b9 2761 try_get(caption_track, lambda x: x['name']['simpleText']),
774d79cc 2762 {})
545cc85d 2763 continue
2764 automatic_captions = {}
2765 for translation_language in (pctr.get('translationLanguages') or []):
2766 translation_language_code = translation_language.get('languageCode')
2767 if not translation_language_code:
2768 continue
2769 process_language(
2770 automatic_captions, base_url, translation_language_code,
49c258e1 2771 try_get(translation_language, (
2772 lambda x: x['languageName']['simpleText'],
2773 lambda x: x['languageName']['runs'][0]['text'])),
545cc85d 2774 {'tlang': translation_language_code})
2775 info['automatic_captions'] = automatic_captions
2776 info['subtitles'] = subtitles
7e72694b 2777
545cc85d 2778 parsed_url = compat_urllib_parse_urlparse(url)
2779 for component in [parsed_url.fragment, parsed_url.query]:
2780 query = compat_parse_qs(component)
2781 for k, v in query.items():
2782 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2783 d_k += '_time'
2784 if d_k not in info and k in s_ks:
2785 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2786
2787 # Youtube Music Auto-generated description
822b9d9c 2788 if video_description:
38d70284 2789 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2790 if mobj:
822b9d9c
RA
2791 release_year = mobj.group('release_year')
2792 release_date = mobj.group('release_date')
2793 if release_date:
2794 release_date = release_date.replace('-', '')
2795 if not release_year:
545cc85d 2796 release_year = release_date[:4]
2797 info.update({
2798 'album': mobj.group('album'.strip()),
2799 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2800 'track': mobj.group('track').strip(),
2801 'release_date': release_date,
cc2db878 2802 'release_year': int_or_none(release_year),
545cc85d 2803 })
7e72694b 2804
545cc85d 2805 initial_data = None
2806 if webpage:
2807 initial_data = self._extract_yt_initial_variable(
2808 webpage, self._YT_INITIAL_DATA_RE, video_id,
2809 'yt initial data')
2810 if not initial_data:
109dd3b2 2811 initial_data = self._extract_response(
2812 item_id=video_id, ep='next', fatal=False,
2813 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2814 note='Downloading initial data API JSON')
545cc85d 2815
c60ee3a2 2816 try:
2817 # This will error if there is no livechat
2818 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2819 info['subtitles']['live_chat'] = [{
2820 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2821 'video_id': video_id,
2822 'ext': 'json',
f6745c49 2823 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2824 }]
2825 except (KeyError, IndexError, TypeError):
2826 pass
545cc85d 2827
2828 if initial_data:
2829 chapters = self._extract_chapters_from_json(
2830 initial_data, video_id, duration)
2831 if not chapters:
2832 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2833 contents = try_get(
2834 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2835 list)
2836 if not contents:
2837 continue
2838
2839 def chapter_time(mmlir):
2840 return parse_duration(
2841 get_text(mmlir.get('timeDescription')))
2842
2843 chapters = []
2844 for next_num, content in enumerate(contents, start=1):
2845 mmlir = content.get('macroMarkersListItemRenderer') or {}
2846 start_time = chapter_time(mmlir)
2847 end_time = chapter_time(try_get(
2848 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2849 if next_num < len(contents) else duration
2850 if start_time is None or end_time is None:
2851 continue
2852 chapters.append({
2853 'start_time': start_time,
2854 'end_time': end_time,
2855 'title': get_text(mmlir.get('title')),
2856 })
2857 if chapters:
2858 break
2859 if chapters:
2860 info['chapters'] = chapters
2861
2862 contents = try_get(
2863 initial_data,
2864 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2865 list) or []
2866 for content in contents:
2867 vpir = content.get('videoPrimaryInfoRenderer')
2868 if vpir:
2869 stl = vpir.get('superTitleLink')
2870 if stl:
2871 stl = get_text(stl)
2872 if try_get(
2873 vpir,
2874 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2875 info['location'] = stl
2876 else:
2877 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2878 if mobj:
2879 info.update({
2880 'series': mobj.group(1),
2881 'season_number': int(mobj.group(2)),
2882 'episode_number': int(mobj.group(3)),
2883 })
2884 for tlb in (try_get(
2885 vpir,
2886 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2887 list) or []):
2888 tbr = tlb.get('toggleButtonRenderer') or {}
2889 for getter, regex in [(
2890 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2891 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2892 lambda x: x['accessibility'],
2893 lambda x: x['accessibilityData']['accessibilityData'],
2894 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2895 label = (try_get(tbr, getter, dict) or {}).get('label')
2896 if label:
2897 mobj = re.match(regex, label)
2898 if mobj:
2899 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2900 break
2901 sbr_tooltip = try_get(
2902 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2903 if sbr_tooltip:
2904 like_count, dislike_count = sbr_tooltip.split(' / ')
2905 info.update({
2906 'like_count': str_to_int(like_count),
2907 'dislike_count': str_to_int(dislike_count),
2908 })
2909 vsir = content.get('videoSecondaryInfoRenderer')
2910 if vsir:
2911 info['channel'] = get_text(try_get(
2912 vsir,
2913 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2914 dict))
545cc85d 2915 rows = try_get(
2916 vsir,
2917 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2918 list) or []
2919 multiple_songs = False
2920 for row in rows:
2921 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2922 multiple_songs = True
2923 break
2924 for row in rows:
2925 mrr = row.get('metadataRowRenderer') or {}
2926 mrr_title = mrr.get('title')
2927 if not mrr_title:
2928 continue
2929 mrr_title = get_text(mrr['title'])
2930 mrr_contents_text = get_text(mrr['contents'][0])
2931 if mrr_title == 'License':
2932 info['license'] = mrr_contents_text
2933 elif not multiple_songs:
2934 if mrr_title == 'Album':
2935 info['album'] = mrr_contents_text
2936 elif mrr_title == 'Artist':
2937 info['artist'] = mrr_contents_text
2938 elif mrr_title == 'Song':
2939 info['track'] = mrr_contents_text
2940
2941 fallbacks = {
2942 'channel': 'uploader',
2943 'channel_id': 'uploader_id',
2944 'channel_url': 'uploader_url',
2945 }
2946 for to, frm in fallbacks.items():
2947 if not info.get(to):
2948 info[to] = info.get(frm)
2949
2950 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2951 v = info.get(s_k)
2952 if v:
2953 info[d_k] = v
b84071c0 2954
c224251a
M
2955 is_private = bool_or_none(video_details.get('isPrivate'))
2956 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2957 is_membersonly = None
b28f8d24 2958 is_premium = None
c224251a
M
2959 if initial_data and is_private is not None:
2960 is_membersonly = False
b28f8d24 2961 is_premium = False
c224251a
M
2962 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2963 for content in contents or []:
2964 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2965 for badge in badges or []:
2966 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2967 if label.lower() == 'members only':
2968 is_membersonly = True
2969 break
b28f8d24
M
2970 elif label.lower() == 'premium':
2971 is_premium = True
2972 break
2973 if is_membersonly or is_premium:
c224251a
M
2974 break
2975
2976 # TODO: Add this for playlists
2977 info['availability'] = self._availability(
2978 is_private=is_private,
b28f8d24 2979 needs_premium=is_premium,
c224251a
M
2980 needs_subscription=is_membersonly,
2981 needs_auth=info['age_limit'] >= 18,
2982 is_unlisted=None if is_private is None else is_unlisted)
2983
06167fbb 2984 # get xsrf for annotations or comments
a06916d9 2985 get_annotations = self.get_param('writeannotations', False)
2986 get_comments = self.get_param('getcomments', False)
06167fbb 2987 if get_annotations or get_comments:
29f7c58a 2988 xsrf_token = None
545cc85d 2989 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2990 if ytcfg:
2991 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2992 if not xsrf_token:
2993 xsrf_token = self._search_regex(
2994 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2995 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2996
2997 # annotations
06167fbb 2998 if get_annotations:
64b6a4e9
RA
2999 invideo_url = try_get(
3000 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
3001 if xsrf_token and invideo_url:
29f7c58a 3002 xsrf_field_name = None
3003 if ytcfg:
3004 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3005 if not xsrf_field_name:
3006 xsrf_field_name = self._search_regex(
3007 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3008 webpage, 'xsrf field name',
29f7c58a 3009 group='xsrf_field_name', default='session_token')
8a784c74 3010 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3011 self._proto_relative_url(invideo_url),
3012 video_id, note='Downloading annotations',
3013 errnote='Unable to download video annotations', fatal=False,
3014 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3015
277d6ff5 3016 if get_comments:
2d6659b9 3017 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
4ea3be0a 3018
545cc85d 3019 self.mark_watched(video_id, player_response)
d77ab8e2 3020
545cc85d 3021 return info
c5e8d7af 3022
5f6a1245 3023
8bdd16b4 3024class YoutubeTabIE(YoutubeBaseInfoExtractor):
3025 IE_DESC = 'YouTube.com tab'
70d5c17b 3026 _VALID_URL = r'''(?x)
3027 https?://
3028 (?:\w+\.)?
3029 (?:
3030 youtube(?:kids)?\.com|
3031 invidio\.us
3032 )/
3033 (?:
fe03a6cd 3034 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3035 (?P<not_channel>
9ba5705a 3036 feed/|hashtag/|
70d5c17b 3037 (?:playlist|watch)\?.*?\blist=
3038 )|
29f7c58a 3039 (?!(?:%s)\b) # Direct URLs
70d5c17b 3040 )
3041 (?P<id>[^/?\#&]+)
3042 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3043 IE_NAME = 'youtube:tab'
3044
81127aa5 3045 _TESTS = [{
da692b79 3046 'note': 'playlists, multipage',
8bdd16b4 3047 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3048 'playlist_mincount': 94,
3049 'info_dict': {
3050 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3051 'title': 'Игорь Клейнер - Playlists',
3052 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3053 'uploader': 'Игорь Клейнер',
3054 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3055 },
3056 }, {
da692b79 3057 'note': 'playlists, multipage, different order',
8bdd16b4 3058 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3059 'playlist_mincount': 94,
3060 'info_dict': {
3061 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3062 'title': 'Игорь Клейнер - Playlists',
3063 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3064 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3065 'uploader': 'Игорь Клейнер',
8bdd16b4 3066 },
201c1459 3067 }, {
da692b79 3068 'note': 'playlists, series',
201c1459 3069 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3070 'playlist_mincount': 5,
3071 'info_dict': {
3072 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3073 'title': '3Blue1Brown - Playlists',
3074 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3075 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3076 'uploader': '3Blue1Brown',
201c1459 3077 },
8bdd16b4 3078 }, {
da692b79 3079 'note': 'playlists, singlepage',
8bdd16b4 3080 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3081 'playlist_mincount': 4,
3082 'info_dict': {
3083 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3084 'title': 'ThirstForScience - Playlists',
3085 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3086 'uploader': 'ThirstForScience',
3087 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3088 }
3089 }, {
3090 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3091 'only_matching': True,
3092 }, {
da692b79 3093 'note': 'basic, single video playlist',
0e30a7b9 3094 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3095 'info_dict': {
0e30a7b9 3096 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3097 'uploader': 'Sergey M.',
3098 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3099 'title': 'youtube-dl public playlist',
81127aa5 3100 },
0e30a7b9 3101 'playlist_count': 1,
9291475f 3102 }, {
da692b79 3103 'note': 'empty playlist',
0e30a7b9 3104 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3105 'info_dict': {
0e30a7b9 3106 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3107 'uploader': 'Sergey M.',
3108 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3109 'title': 'youtube-dl empty playlist',
9291475f
PH
3110 },
3111 'playlist_count': 0,
3112 }, {
da692b79 3113 'note': 'Home tab',
8bdd16b4 3114 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3115 'info_dict': {
8bdd16b4 3116 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3117 'title': 'lex will - Home',
3118 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3119 'uploader': 'lex will',
3120 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3121 },
8bdd16b4 3122 'playlist_mincount': 2,
9291475f 3123 }, {
da692b79 3124 'note': 'Videos tab',
8bdd16b4 3125 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3126 'info_dict': {
8bdd16b4 3127 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3128 'title': 'lex will - Videos',
3129 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3130 'uploader': 'lex will',
3131 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3132 },
8bdd16b4 3133 'playlist_mincount': 975,
9291475f 3134 }, {
da692b79 3135 'note': 'Videos tab, sorted by popular',
8bdd16b4 3136 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3137 'info_dict': {
8bdd16b4 3138 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3139 'title': 'lex will - Videos',
3140 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3141 'uploader': 'lex will',
3142 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3143 },
8bdd16b4 3144 'playlist_mincount': 199,
9291475f 3145 }, {
da692b79 3146 'note': 'Playlists tab',
8bdd16b4 3147 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3148 'info_dict': {
8bdd16b4 3149 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3150 'title': 'lex will - Playlists',
3151 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3152 'uploader': 'lex will',
3153 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3154 },
8bdd16b4 3155 'playlist_mincount': 17,
ac7553d0 3156 }, {
da692b79 3157 'note': 'Community tab',
8bdd16b4 3158 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3159 'info_dict': {
8bdd16b4 3160 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3161 'title': 'lex will - Community',
3162 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3163 'uploader': 'lex will',
3164 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3165 },
3166 'playlist_mincount': 18,
87dadd45 3167 }, {
da692b79 3168 'note': 'Channels tab',
8bdd16b4 3169 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3170 'info_dict': {
8bdd16b4 3171 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3172 'title': 'lex will - Channels',
3173 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3174 'uploader': 'lex will',
3175 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3176 },
deaec5af 3177 'playlist_mincount': 12,
cd684175 3178 }, {
3179 'note': 'Search tab',
3180 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3181 'playlist_mincount': 40,
3182 'info_dict': {
3183 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3184 'title': '3Blue1Brown - Search - linear algebra',
3185 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3186 'uploader': '3Blue1Brown',
3187 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3188 },
6b08cdf6 3189 }, {
a0566bbf 3190 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3191 'only_matching': True,
3192 }, {
a0566bbf 3193 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3194 'only_matching': True,
3195 }, {
a0566bbf 3196 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3197 'only_matching': True,
3198 }, {
3199 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3200 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3201 'info_dict': {
3202 'title': '29C3: Not my department',
3203 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3204 'uploader': 'Christiaan008',
3205 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3206 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3207 },
3208 'playlist_count': 96,
3209 }, {
3210 'note': 'Large playlist',
3211 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3212 'info_dict': {
8bdd16b4 3213 'title': 'Uploads from Cauchemar',
3214 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3215 'uploader': 'Cauchemar',
3216 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3217 },
8bdd16b4 3218 'playlist_mincount': 1123,
3219 }, {
da692b79 3220 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3221 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3222 'only_matching': True,
4b7df0d3
JMF
3223 }, {
3224 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3225 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3226 'info_dict': {
acf757f4
PH
3227 'title': 'Uploads from Interstellar Movie',
3228 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3229 'uploader': 'Interstellar Movie',
8bdd16b4 3230 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3231 },
481cc733 3232 'playlist_mincount': 21,
358de58c 3233 }, {
3234 'note': 'Playlist with "show unavailable videos" button',
3235 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3236 'info_dict': {
3237 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3238 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3239 'uploader': 'Phim Siêu Nhân Nhật Bản',
3240 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3241 },
da692b79 3242 'playlist_mincount': 200,
5d342002 3243 }, {
da692b79 3244 'note': 'Playlist with unavailable videos in page 7',
5d342002 3245 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3246 'info_dict': {
3247 'title': 'Uploads from BlankTV',
3248 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3249 'uploader': 'BlankTV',
3250 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3251 },
da692b79 3252 'playlist_mincount': 1000,
8bdd16b4 3253 }, {
da692b79 3254 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3255 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3256 'info_dict': {
3257 'title': 'Data Analysis with Dr Mike Pound',
3258 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3259 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3260 'uploader': 'Computerphile',
deaec5af 3261 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3262 },
3263 'playlist_mincount': 11,
3264 }, {
a0566bbf 3265 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3266 'only_matching': True,
dacb3a86 3267 }, {
da692b79 3268 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3269 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3270 'info_dict': {
3271 'id': 'FqZTN594JQw',
3272 'ext': 'webm',
3273 'title': "Smiley's People 01 detective, Adventure Series, Action",
3274 'uploader': 'STREEM',
3275 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3276 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3277 'upload_date': '20150526',
3278 'license': 'Standard YouTube License',
3279 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3280 'categories': ['People & Blogs'],
3281 'tags': list,
dbdaaa23 3282 'view_count': int,
dacb3a86
S
3283 'like_count': int,
3284 'dislike_count': int,
3285 },
3286 'params': {
3287 'skip_download': True,
3288 },
13a75688 3289 'skip': 'This video is not available.',
dacb3a86 3290 'add_ie': [YoutubeIE.ie_key()],
481cc733 3291 }, {
8bdd16b4 3292 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3293 'only_matching': True,
66b48727 3294 }, {
8bdd16b4 3295 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3296 'only_matching': True,
a0566bbf 3297 }, {
3298 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3299 'info_dict': {
da692b79 3300 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 3301 'ext': 'mp4',
deaec5af 3302 'title': compat_str,
a0566bbf 3303 'uploader': 'Sky News',
3304 'uploader_id': 'skynews',
3305 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3306 'upload_date': r're:\d{8}',
3307 'description': compat_str,
a0566bbf 3308 'categories': ['News & Politics'],
3309 'tags': list,
3310 'like_count': int,
3311 'dislike_count': int,
3312 },
3313 'params': {
3314 'skip_download': True,
3315 },
da692b79 3316 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3317 }, {
3318 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3319 'info_dict': {
3320 'id': 'a48o2S1cPoo',
3321 'ext': 'mp4',
3322 'title': 'The Young Turks - Live Main Show',
3323 'uploader': 'The Young Turks',
3324 'uploader_id': 'TheYoungTurks',
3325 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3326 'upload_date': '20150715',
3327 'license': 'Standard YouTube License',
3328 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3329 'categories': ['News & Politics'],
3330 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3331 'like_count': int,
3332 'dislike_count': int,
3333 },
3334 'params': {
3335 'skip_download': True,
3336 },
3337 'only_matching': True,
3338 }, {
3339 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3340 'only_matching': True,
3341 }, {
3342 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3343 'only_matching': True,
09f1580e 3344 }, {
3345 'note': 'A channel that is not live. Should raise error',
3346 'url': 'https://www.youtube.com/user/numberphile/live',
3347 'only_matching': True,
3d3dddc9 3348 }, {
3349 'url': 'https://www.youtube.com/feed/trending',
3350 'only_matching': True,
3351 }, {
3d3dddc9 3352 'url': 'https://www.youtube.com/feed/library',
3353 'only_matching': True,
3354 }, {
3d3dddc9 3355 'url': 'https://www.youtube.com/feed/history',
3356 'only_matching': True,
3357 }, {
3d3dddc9 3358 'url': 'https://www.youtube.com/feed/subscriptions',
3359 'only_matching': True,
3360 }, {
3d3dddc9 3361 'url': 'https://www.youtube.com/feed/watch_later',
3362 'only_matching': True,
3363 }, {
da692b79 3364 'note': 'Recommended - redirects to home page',
3d3dddc9 3365 'url': 'https://www.youtube.com/feed/recommended',
3366 'only_matching': True,
29f7c58a 3367 }, {
da692b79 3368 'note': 'inline playlist with not always working continuations',
29f7c58a 3369 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3370 'only_matching': True,
3371 }, {
3372 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3373 'only_matching': True,
3374 }, {
3375 'url': 'https://www.youtube.com/course',
3376 'only_matching': True,
3377 }, {
3378 'url': 'https://www.youtube.com/zsecurity',
3379 'only_matching': True,
3380 }, {
3381 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3382 'only_matching': True,
3383 }, {
3384 'url': 'https://www.youtube.com/TheYoungTurks/live',
3385 'only_matching': True,
39ed931e 3386 }, {
3387 'url': 'https://www.youtube.com/hashtag/cctv9',
3388 'info_dict': {
3389 'id': 'cctv9',
3390 'title': '#cctv9',
3391 },
3392 'playlist_mincount': 350,
201c1459 3393 }, {
3394 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3395 'only_matching': True,
9297939e 3396 }, {
da692b79 3397 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3398 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3399 'only_matching': True
fe03a6cd 3400 }, {
3401 'note': '/browse/ should redirect to /channel/',
3402 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3403 'only_matching': True
3404 }, {
3405 'note': 'VLPL, should redirect to playlist?list=PL...',
3406 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3407 'info_dict': {
3408 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3409 'uploader': 'NoCopyrightSounds',
3410 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3411 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3412 'title': 'NCS Releases',
3413 },
3414 'playlist_mincount': 166,
18db7548 3415 }, {
3416 'note': 'Topic, should redirect to playlist?list=UU...',
3417 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3418 'info_dict': {
3419 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3420 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3421 'title': 'Uploads from Royalty Free Music - Topic',
3422 'uploader': 'Royalty Free Music - Topic',
3423 },
3424 'expected_warnings': [
3425 'A channel/user page was given',
3426 'The URL does not have a videos tab',
3427 ],
3428 'playlist_mincount': 101,
3429 }, {
3430 'note': 'Topic without a UU playlist',
3431 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3432 'info_dict': {
3433 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3434 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3435 },
3436 'expected_warnings': [
3437 'A channel/user page was given',
3438 'The URL does not have a videos tab',
3439 'Falling back to channel URL',
3440 ],
3441 'playlist_mincount': 9,
abcdd12b 3442 }, {
3443 'note': 'Youtube music Album',
3444 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3445 'info_dict': {
3446 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3447 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3448 },
3449 'playlist_count': 50,
29f7c58a 3450 }]
3451
3452 @classmethod
3453 def suitable(cls, url):
3454 return False if YoutubeIE.suitable(url) else super(
3455 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3456
3457 def _extract_channel_id(self, webpage):
3458 channel_id = self._html_search_meta(
3459 'channelId', webpage, 'channel id', default=None)
3460 if channel_id:
3461 return channel_id
3462 channel_url = self._html_search_meta(
3463 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3464 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3465 'twitter:app:url:googleplay'), webpage, 'channel url')
3466 return self._search_regex(
3467 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3468 channel_url, 'channel id')
15f6397c 3469
8bdd16b4 3470 @staticmethod
cd7c66cf 3471 def _extract_basic_item_renderer(item):
3472 # Modified from _extract_grid_item_renderer
201c1459 3473 known_basic_renderers = (
3474 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3475 )
3476 for key, renderer in item.items():
201c1459 3477 if not isinstance(renderer, dict):
cd7c66cf 3478 continue
201c1459 3479 elif key in known_basic_renderers:
3480 return renderer
3481 elif key.startswith('grid') and key.endswith('Renderer'):
3482 return renderer
8bdd16b4 3483
8bdd16b4 3484 def _grid_entries(self, grid_renderer):
3485 for item in grid_renderer['items']:
3486 if not isinstance(item, dict):
39b62db1 3487 continue
cd7c66cf 3488 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3489 if not isinstance(renderer, dict):
3490 continue
3491 title = try_get(
201c1459 3492 renderer, (lambda x: x['title']['runs'][0]['text'],
3493 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 3494 # playlist
3495 playlist_id = renderer.get('playlistId')
3496 if playlist_id:
3497 yield self.url_result(
3498 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3499 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3500 video_title=title)
201c1459 3501 continue
8bdd16b4 3502 # video
3503 video_id = renderer.get('videoId')
3504 if video_id:
3505 yield self._extract_video(renderer)
201c1459 3506 continue
8bdd16b4 3507 # channel
3508 channel_id = renderer.get('channelId')
3509 if channel_id:
3510 title = try_get(
3511 renderer, lambda x: x['title']['simpleText'], compat_str)
3512 yield self.url_result(
3513 'https://www.youtube.com/channel/%s' % channel_id,
3514 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3515 continue
3516 # generic endpoint URL support
3517 ep_url = urljoin('https://www.youtube.com/', try_get(
3518 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3519 compat_str))
3520 if ep_url:
3521 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3522 if ie.suitable(ep_url):
3523 yield self.url_result(
3524 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3525 break
8bdd16b4 3526
3d3dddc9 3527 def _shelf_entries_from_content(self, shelf_renderer):
3528 content = shelf_renderer.get('content')
3529 if not isinstance(content, dict):
8bdd16b4 3530 return
cd7c66cf 3531 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3532 if renderer:
3533 # TODO: add support for nested playlists so each shelf is processed
3534 # as separate playlist
3535 # TODO: this includes only first N items
3536 for entry in self._grid_entries(renderer):
3537 yield entry
3538 renderer = content.get('horizontalListRenderer')
3539 if renderer:
3540 # TODO
3541 pass
8bdd16b4 3542
29f7c58a 3543 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3544 ep = try_get(
3545 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3546 compat_str)
3547 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3548 if shelf_url:
29f7c58a 3549 # Skipping links to another channels, note that checking for
3550 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3551 # will not work
3552 if skip_channels and '/channels?' in shelf_url:
3553 return
3d3dddc9 3554 title = try_get(
3555 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3556 yield self.url_result(shelf_url, video_title=title)
3557 # Shelf may not contain shelf URL, fallback to extraction from content
3558 for entry in self._shelf_entries_from_content(shelf_renderer):
3559 yield entry
c5e8d7af 3560
8bdd16b4 3561 def _playlist_entries(self, video_list_renderer):
3562 for content in video_list_renderer['contents']:
3563 if not isinstance(content, dict):
3564 continue
3565 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3566 if not isinstance(renderer, dict):
3567 continue
3568 video_id = renderer.get('videoId')
3569 if not video_id:
3570 continue
3571 yield self._extract_video(renderer)
07aeced6 3572
3462ffa8 3573 def _rich_entries(self, rich_grid_renderer):
3574 renderer = try_get(
70d5c17b 3575 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3576 video_id = renderer.get('videoId')
3577 if not video_id:
3578 return
3579 yield self._extract_video(renderer)
3580
8bdd16b4 3581 def _video_entry(self, video_renderer):
3582 video_id = video_renderer.get('videoId')
3583 if video_id:
3584 return self._extract_video(video_renderer)
dacb3a86 3585
8bdd16b4 3586 def _post_thread_entries(self, post_thread_renderer):
3587 post_renderer = try_get(
3588 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3589 if not post_renderer:
3590 return
3591 # video attachment
3592 video_renderer = try_get(
895b0931 3593 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3594 video_id = video_renderer.get('videoId')
3595 if video_id:
3596 entry = self._extract_video(video_renderer)
8bdd16b4 3597 if entry:
3598 yield entry
895b0931 3599 # playlist attachment
3600 playlist_id = try_get(
3601 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3602 if playlist_id:
3603 yield self.url_result(
e28f1c0a 3604 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3605 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3606 # inline video links
3607 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3608 for run in runs:
3609 if not isinstance(run, dict):
3610 continue
3611 ep_url = try_get(
3612 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3613 if not ep_url:
3614 continue
3615 if not YoutubeIE.suitable(ep_url):
3616 continue
3617 ep_video_id = YoutubeIE._match_id(ep_url)
3618 if video_id == ep_video_id:
3619 continue
895b0931 3620 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3621
8bdd16b4 3622 def _post_thread_continuation_entries(self, post_thread_continuation):
3623 contents = post_thread_continuation.get('contents')
3624 if not isinstance(contents, list):
3625 return
3626 for content in contents:
3627 renderer = content.get('backstagePostThreadRenderer')
3628 if not isinstance(renderer, dict):
3629 continue
3630 for entry in self._post_thread_entries(renderer):
3631 yield entry
07aeced6 3632
39ed931e 3633 r''' # unused
3634 def _rich_grid_entries(self, contents):
3635 for content in contents:
3636 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3637 if video_renderer:
3638 entry = self._video_entry(video_renderer)
3639 if entry:
3640 yield entry
3641 '''
f4f751af 3642 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3643
70d5c17b 3644 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3645 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3646 for content in contents:
3647 if not isinstance(content, dict):
8bdd16b4 3648 continue
70d5c17b 3649 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3650 if not is_renderer:
70d5c17b 3651 renderer = content.get('richItemRenderer')
3462ffa8 3652 if renderer:
3653 for entry in self._rich_entries(renderer):
3654 yield entry
3655 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3656 continue
3462ffa8 3657 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3658 for isr_content in isr_contents:
3659 if not isinstance(isr_content, dict):
3660 continue
69184e41 3661
3662 known_renderers = {
3663 'playlistVideoListRenderer': self._playlist_entries,
3664 'gridRenderer': self._grid_entries,
3665 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3666 'backstagePostThreadRenderer': self._post_thread_entries,
3667 'videoRenderer': lambda x: [self._video_entry(x)],
3668 }
3669 for key, renderer in isr_content.items():
3670 if key not in known_renderers:
3671 continue
3672 for entry in known_renderers[key](renderer):
3673 if entry:
3674 yield entry
3462ffa8 3675 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3676 break
70d5c17b 3677
3462ffa8 3678 if not continuation_list[0]:
3679 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3680
3681 if not continuation_list[0]:
3682 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3683
3684 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3685 tab_content = try_get(tab, lambda x: x['content'], dict)
3686 if not tab_content:
3687 return
3462ffa8 3688 parent_renderer = (
29f7c58a 3689 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3690 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3691 for entry in extract_entries(parent_renderer):
3692 yield entry
3462ffa8 3693 continuation = continuation_list[0]
f4f751af 3694 context = self._extract_context(ytcfg)
3695 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3696
8bdd16b4 3697 for page_num in itertools.count(1):
3698 if not continuation:
3699 break
79360d99 3700 query = {
3701 'continuation': continuation['continuation'],
3702 'clickTracking': {'clickTrackingParams': continuation['itct']}
3703 }
f4f751af 3704 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3705 response = self._extract_response(
3706 item_id='%s page %s' % (item_id, page_num),
3707 query=query, headers=headers, ytcfg=ytcfg,
3708 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3709
3710 if not response:
8bdd16b4 3711 break
f4f751af 3712 visitor_data = try_get(
3713 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3714
69184e41 3715 known_continuation_renderers = {
3716 'playlistVideoListContinuation': self._playlist_entries,
3717 'gridContinuation': self._grid_entries,
3718 'itemSectionContinuation': self._post_thread_continuation_entries,
3719 'sectionListContinuation': extract_entries, # for feeds
3720 }
8bdd16b4 3721 continuation_contents = try_get(
69184e41 3722 response, lambda x: x['continuationContents'], dict) or {}
3723 continuation_renderer = None
3724 for key, value in continuation_contents.items():
3725 if key not in known_continuation_renderers:
3462ffa8 3726 continue
69184e41 3727 continuation_renderer = value
3728 continuation_list = [None]
3729 for entry in known_continuation_renderers[key](continuation_renderer):
3730 yield entry
3731 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3732 break
3733 if continuation_renderer:
3734 continue
c5e8d7af 3735
a1b535bd 3736 known_renderers = {
3737 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3738 'gridVideoRenderer': (self._grid_entries, 'items'),
3739 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3740 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3741 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3742 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3743 }
cce889b9 3744 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3745 continuation_items = try_get(
cce889b9 3746 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3747 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3748 video_items_renderer = None
3749 for key, value in continuation_item.items():
3750 if key not in known_renderers:
8bdd16b4 3751 continue
a1b535bd 3752 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3753 continuation_list = [None]
a1b535bd 3754 for entry in known_renderers[key][0](video_items_renderer):
3755 yield entry
9ba5705a 3756 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3757 break
3758 if video_items_renderer:
3759 continue
8bdd16b4 3760 break
9558dcec 3761
8bdd16b4 3762 @staticmethod
3763 def _extract_selected_tab(tabs):
3764 for tab in tabs:
cd684175 3765 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3766 if renderer.get('selected') is True:
3767 return renderer
2b3c2546 3768 else:
8bdd16b4 3769 raise ExtractorError('Unable to find selected tab')
b82f815f 3770
8bdd16b4 3771 @staticmethod
3772 def _extract_uploader(data):
3773 uploader = {}
3774 sidebar_renderer = try_get(
3775 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3776 if sidebar_renderer:
3777 for item in sidebar_renderer:
3778 if not isinstance(item, dict):
3779 continue
3780 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3781 if not isinstance(renderer, dict):
3782 continue
3783 owner = try_get(
3784 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3785 if owner:
3786 uploader['uploader'] = owner.get('text')
3787 uploader['uploader_id'] = try_get(
3788 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3789 uploader['uploader_url'] = urljoin(
3790 'https://www.youtube.com/',
3791 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3792 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3793
d069eca7 3794 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3795 playlist_id = title = description = channel_url = channel_name = channel_id = None
3796 thumbnails_list = tags = []
3797
8bdd16b4 3798 selected_tab = self._extract_selected_tab(tabs)
3799 renderer = try_get(
3800 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3801 if renderer:
b60419c5 3802 channel_name = renderer.get('title')
3803 channel_url = renderer.get('channelUrl')
3804 channel_id = renderer.get('externalId')
39ed931e 3805 else:
64c0d954 3806 renderer = try_get(
3807 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3808
8bdd16b4 3809 if renderer:
3810 title = renderer.get('title')
ecc97af3 3811 description = renderer.get('description', '')
b60419c5 3812 playlist_id = channel_id
3813 tags = renderer.get('keywords', '').split()
3814 thumbnails_list = (
3815 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3816 or try_get(
3817 data,
3818 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3819 list)
b60419c5 3820 or [])
3821
3822 thumbnails = []
3823 for t in thumbnails_list:
3824 if not isinstance(t, dict):
3825 continue
3826 thumbnail_url = url_or_none(t.get('url'))
3827 if not thumbnail_url:
3828 continue
3829 thumbnails.append({
3830 'url': thumbnail_url,
3831 'width': int_or_none(t.get('width')),
3832 'height': int_or_none(t.get('height')),
3833 })
3462ffa8 3834 if playlist_id is None:
70d5c17b 3835 playlist_id = item_id
3836 if title is None:
39ed931e 3837 title = (
3838 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3839 or playlist_id)
b60419c5 3840 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3841 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3842
3843 metadata = {
3844 'playlist_id': playlist_id,
3845 'playlist_title': title,
3846 'playlist_description': description,
3847 'uploader': channel_name,
3848 'uploader_id': channel_id,
3849 'uploader_url': channel_url,
3850 'thumbnails': thumbnails,
3851 'tags': tags,
3852 }
3853 if not channel_id:
3854 metadata.update(self._extract_uploader(data))
3855 metadata.update({
3856 'channel': metadata['uploader'],
3857 'channel_id': metadata['uploader_id'],
3858 'channel_url': metadata['uploader_url']})
3859 return self.playlist_result(
d069eca7
M
3860 self._entries(
3861 selected_tab, playlist_id,
3862 self._extract_identity_token(webpage, item_id),
f4f751af 3863 self._extract_account_syncid(data),
3864 self._extract_ytcfg(item_id, webpage)),
b60419c5 3865 **metadata)
73c4ac2c 3866
79360d99 3867 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3868 first_id = last_id = None
79360d99 3869 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3870 headers = self._generate_api_headers(
3871 ytcfg, account_syncid=self._extract_account_syncid(data),
3872 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3873 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3874 for page_num in itertools.count(1):
cd7c66cf 3875 videos = list(self._playlist_entries(playlist))
3876 if not videos:
3877 return
2be71994 3878 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3879 if start >= len(videos):
3880 return
3881 for video in videos[start:]:
3882 if video['id'] == first_id:
3883 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3884 return
3885 yield video
3886 first_id = first_id or videos[0]['id']
3887 last_id = videos[-1]['id']
79360d99 3888 watch_endpoint = try_get(
3889 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3890 query = {
3891 'playlistId': playlist_id,
3892 'videoId': watch_endpoint.get('videoId') or last_id,
3893 'index': watch_endpoint.get('index') or len(videos),
3894 'params': watch_endpoint.get('params') or 'OAE%3D'
3895 }
3896 response = self._extract_response(
3897 item_id='%s page %d' % (playlist_id, page_num),
3898 query=query,
3899 ep='next',
3900 headers=headers,
3901 check_get_keys='contents'
3902 )
cd7c66cf 3903 playlist = try_get(
79360d99 3904 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3905
79360d99 3906 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3907 title = playlist.get('title') or try_get(
3908 data, lambda x: x['titleText']['simpleText'], compat_str)
3909 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3910
3911 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3912 playlist_url = urljoin(url, try_get(
3913 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3914 compat_str))
3915 if playlist_url and playlist_url != url:
3916 return self.url_result(
3917 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3918 video_title=title)
cd7c66cf 3919
8bdd16b4 3920 return self.playlist_result(
79360d99 3921 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3922 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3923
358de58c 3924 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3925 """
3926 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3927 """
3928 sidebar_renderer = try_get(
5d342002 3929 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3930 if not sidebar_renderer:
3931 return
3932 browse_id = params = None
358de58c 3933 for item in sidebar_renderer:
3934 if not isinstance(item, dict):
3935 continue
3936 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3937 menu_renderer = try_get(
3938 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3939 for menu_item in menu_renderer:
3940 if not isinstance(menu_item, dict):
3941 continue
3942 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3943 text = try_get(
3944 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3945 if not text or text.lower() != 'show unavailable videos':
3946 continue
3947 browse_endpoint = try_get(
3948 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3949 browse_id = browse_endpoint.get('browseId')
3950 params = browse_endpoint.get('params')
5d342002 3951 break
3952
3953 ytcfg = self._extract_ytcfg(item_id, webpage)
3954 headers = self._generate_api_headers(
3955 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3956 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3957 visitor_data=try_get(
3958 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3959 query = {
3960 'params': params or 'wgYCCAA=',
3961 'browseId': browse_id or 'VL%s' % item_id
3962 }
3963 return self._extract_response(
3964 item_id=item_id, headers=headers, query=query,
3965 check_get_keys='contents', fatal=False,
3966 note='Downloading API JSON with unavailable videos')
358de58c 3967
cd7c66cf 3968 def _extract_webpage(self, url, item_id):
a06916d9 3969 retries = self.get_param('extractor_retries', 3)
62bff2c1 3970 count = -1
c705177d 3971 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3972 while count < retries:
62bff2c1 3973 count += 1
14fdfea9 3974 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3975 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3976 if count:
c705177d 3977 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3978 webpage = self._download_webpage(
3979 url, item_id,
cd7c66cf 3980 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3981 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3982 if data.get('contents') or data.get('currentVideoEndpoint'):
3983 break
95c01b6c 3984 # Extract alerts here only when there is error
3985 self._extract_and_report_alerts(data)
c705177d 3986 if count >= retries:
6a39ee13 3987 raise ExtractorError(last_error)
cd7c66cf 3988 return webpage, data
3989
9297939e 3990 @staticmethod
3991 def _smuggle_data(entries, data):
3992 for entry in entries:
3993 if data:
3994 entry['url'] = smuggle_url(entry['url'], data)
3995 yield entry
3996
cd7c66cf 3997 def _real_extract(self, url):
9297939e 3998 url, smuggled_data = unsmuggle_url(url, {})
3999 if self.is_music_url(url):
4000 smuggled_data['is_music_url'] = True
fe03a6cd 4001 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4002 if info_dict.get('entries'):
4003 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4004 return info_dict
4005
fe03a6cd 4006 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4007
4008 def __real_extract(self, url, smuggled_data):
cd7c66cf 4009 item_id = self._match_id(url)
4010 url = compat_urlparse.urlunparse(
4011 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4012 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4013
fe03a6cd 4014 def get_mobj(url):
4015 mobj = self._url_re.match(url).groupdict()
07cce701 4016 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4017 return mobj
4018
4019 mobj = get_mobj(url)
4020 # Youtube returns incomplete data if tabname is not lower case
4021 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4022
4023 if is_channel:
4024 if smuggled_data.get('is_music_url'):
4025 if item_id[:2] == 'VL':
4026 # Youtube music VL channels have an equivalent playlist
4027 item_id = item_id[2:]
4028 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4029 elif item_id[:2] == 'MP':
4030 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4031 item_id = self._search_regex(
4032 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4033 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4034 'playlist id')
4035 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4036 elif mobj['channel_type'] == 'browse':
4037 # Youtube music /browse/ should be changed to /channel/
4038 pre = 'https://www.youtube.com/channel/%s' % item_id
4039 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4040 # Home URLs should redirect to /videos/
6a39ee13 4041 self.report_warning(
cd7c66cf 4042 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4043 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4044 tab = '/videos'
4045
4046 url = ''.join((pre, tab, post))
4047 mobj = get_mobj(url)
cd7c66cf 4048
4049 # Handle both video/playlist URLs
201c1459 4050 qs = parse_qs(url)
cd7c66cf 4051 video_id = qs.get('v', [None])[0]
4052 playlist_id = qs.get('list', [None])[0]
4053
fe03a6cd 4054 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4055 if not playlist_id:
fe03a6cd 4056 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4057 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4058 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4059 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4060 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4061 mobj = get_mobj(url)
cd7c66cf 4062
4063 if video_id and playlist_id:
a06916d9 4064 if self.get_param('noplaylist'):
cd7c66cf 4065 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4066 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4067 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4068
4069 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4070
18db7548 4071 tabs = try_get(
4072 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4073 if tabs:
4074 selected_tab = self._extract_selected_tab(tabs)
4075 tab_name = selected_tab.get('title', '')
09f1580e 4076 if 'no-youtube-channel-redirect' not in compat_opts:
4077 if mobj['tab'] == '/live':
4078 # Live tab should have redirected to the video
4079 raise ExtractorError('The channel is not currently live', expected=True)
4080 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4081 if not mobj['not_channel'] and item_id[:2] == 'UC':
4082 # Topic channels don't have /videos. Use the equivalent playlist instead
4083 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4084 pl_id = 'UU%s' % item_id[2:]
4085 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4086 try:
4087 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4088 for alert_type, alert_message in self._extract_alerts(pl_data):
4089 if alert_type == 'error':
4090 raise ExtractorError('Youtube said: %s' % alert_message)
4091 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4092 except ExtractorError:
4093 self.report_warning('The playlist gave error. Falling back to channel URL')
4094 else:
4095 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4096
4097 self.write_debug('Final URL: %s' % url)
4098
358de58c 4099 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4100 if 'no-youtube-unavailable-videos' not in compat_opts:
4101 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4102 self._extract_and_report_alerts(data)
358de58c 4103
8bdd16b4 4104 tabs = try_get(
4105 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4106 if tabs:
d069eca7 4107 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4108
8bdd16b4 4109 playlist = try_get(
4110 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4111 if playlist:
79360d99 4112 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4113
a0566bbf 4114 video_id = try_get(
4115 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4116 compat_str) or video_id
8bdd16b4 4117 if video_id:
09f1580e 4118 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4119 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4120 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4121
8bdd16b4 4122 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4123
c5e8d7af 4124
8bdd16b4 4125class YoutubePlaylistIE(InfoExtractor):
4126 IE_DESC = 'YouTube.com playlists'
4127 _VALID_URL = r'''(?x)(?:
4128 (?:https?://)?
4129 (?:\w+\.)?
4130 (?:
4131 (?:
4132 youtube(?:kids)?\.com|
29f7c58a 4133 invidio\.us
8bdd16b4 4134 )
4135 /.*?\?.*?\blist=
4136 )?
4137 (?P<id>%(playlist_id)s)
4138 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4139 IE_NAME = 'youtube:playlist'
cdc628a4 4140 _TESTS = [{
8bdd16b4 4141 'note': 'issue #673',
4142 'url': 'PLBB231211A4F62143',
cdc628a4 4143 'info_dict': {
8bdd16b4 4144 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4145 'id': 'PLBB231211A4F62143',
4146 'uploader': 'Wickydoo',
4147 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4148 },
4149 'playlist_mincount': 29,
4150 }, {
4151 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4152 'info_dict': {
4153 'title': 'YDL_safe_search',
4154 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4155 },
4156 'playlist_count': 2,
4157 'skip': 'This playlist is private',
9558dcec 4158 }, {
8bdd16b4 4159 'note': 'embedded',
4160 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4161 'playlist_count': 4,
9558dcec 4162 'info_dict': {
8bdd16b4 4163 'title': 'JODA15',
4164 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4165 'uploader': 'milan',
4166 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4167 }
cdc628a4 4168 }, {
8bdd16b4 4169 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4170 'playlist_mincount': 982,
4171 'info_dict': {
4172 'title': '2018 Chinese New Singles (11/6 updated)',
4173 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4174 'uploader': 'LBK',
4175 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4176 }
daa0df9e 4177 }, {
29f7c58a 4178 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4179 'only_matching': True,
4180 }, {
4181 # music album playlist
4182 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4183 'only_matching': True,
4184 }]
4185
4186 @classmethod
4187 def suitable(cls, url):
201c1459 4188 if YoutubeTabIE.suitable(url):
4189 return False
1bdae7d3 4190 # Hack for lazy extractors until more generic solution is implemented
4191 # (see #28780)
4192 from .youtube import parse_qs
201c1459 4193 qs = parse_qs(url)
4194 if qs.get('v', [None])[0]:
4195 return False
4196 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4197
4198 def _real_extract(self, url):
4199 playlist_id = self._match_id(url)
46953e7e 4200 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4201 url = update_url_query(
4202 'https://www.youtube.com/playlist',
4203 parse_qs(url) or {'list': playlist_id})
4204 if is_music_url:
4205 url = smuggle_url(url, {'is_music_url': True})
4206 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4207
4208
4209class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4210 IE_DESC = 'youtu.be'
29f7c58a 4211 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4212 _TESTS = [{
8bdd16b4 4213 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4214 'info_dict': {
4215 'id': 'yeWKywCrFtk',
4216 'ext': 'mp4',
4217 'title': 'Small Scale Baler and Braiding Rugs',
4218 'uploader': 'Backus-Page House Museum',
4219 'uploader_id': 'backuspagemuseum',
4220 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4221 'upload_date': '20161008',
4222 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4223 'categories': ['Nonprofits & Activism'],
4224 'tags': list,
4225 'like_count': int,
4226 'dislike_count': int,
4227 },
4228 'params': {
4229 'noplaylist': True,
4230 'skip_download': True,
4231 },
39e7107d 4232 }, {
8bdd16b4 4233 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4234 'only_matching': True,
cdc628a4
PH
4235 }]
4236
8bdd16b4 4237 def _real_extract(self, url):
29f7c58a 4238 mobj = re.match(self._VALID_URL, url)
4239 video_id = mobj.group('id')
4240 playlist_id = mobj.group('playlist_id')
8bdd16b4 4241 return self.url_result(
29f7c58a 4242 update_url_query('https://www.youtube.com/watch', {
4243 'v': video_id,
4244 'list': playlist_id,
4245 'feature': 'youtu.be',
4246 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4247
4248
4249class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4250 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4251 _VALID_URL = r'ytuser:(?P<id>.+)'
4252 _TESTS = [{
4253 'url': 'ytuser:phihag',
4254 'only_matching': True,
4255 }]
4256
4257 def _real_extract(self, url):
4258 user_id = self._match_id(url)
4259 return self.url_result(
4260 'https://www.youtube.com/user/%s' % user_id,
4261 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4262
b05654f0 4263
3d3dddc9 4264class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4265 IE_NAME = 'youtube:favorites'
4266 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4267 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4268 _LOGIN_REQUIRED = True
4269 _TESTS = [{
4270 'url': ':ytfav',
4271 'only_matching': True,
4272 }, {
4273 'url': ':ytfavorites',
4274 'only_matching': True,
4275 }]
4276
4277 def _real_extract(self, url):
4278 return self.url_result(
4279 'https://www.youtube.com/playlist?list=LL',
4280 ie=YoutubeTabIE.ie_key())
4281
4282
79360d99 4283class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4284 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4285 # there doesn't appear to be a real limit, for example if you search for
4286 # 'python' you get more than 8.000.000 results
4287 _MAX_RESULTS = float('inf')
78caa52a 4288 IE_NAME = 'youtube:search'
b05654f0 4289 _SEARCH_KEY = 'ytsearch'
6c894ea1 4290 _SEARCH_PARAMS = None
9dd8e46a 4291 _TESTS = []
b05654f0 4292
6c894ea1 4293 def _entries(self, query, n):
a5c56234 4294 data = {'query': query}
6c894ea1
U
4295 if self._SEARCH_PARAMS:
4296 data['params'] = self._SEARCH_PARAMS
4297 total = 0
4298 for page_num in itertools.count(1):
79360d99 4299 search = self._extract_response(
4300 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4301 check_get_keys=('contents', 'onResponseReceivedCommands')
4302 )
6c894ea1 4303 if not search:
b4c08069 4304 break
6c894ea1
U
4305 slr_contents = try_get(
4306 search,
4307 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4308 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4309 list)
4310 if not slr_contents:
a22b2fd1 4311 break
0366ae87 4312
0366ae87
M
4313 # Youtube sometimes adds promoted content to searches,
4314 # changing the index location of videos and token.
4315 # So we search through all entries till we find them.
30a074c2 4316 continuation_token = None
4317 for slr_content in slr_contents:
a96c6d15 4318 if continuation_token is None:
4319 continuation_token = try_get(
4320 slr_content,
4321 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
4322 compat_str)
4323
30a074c2 4324 isr_contents = try_get(
4325 slr_content,
4326 lambda x: x['itemSectionRenderer']['contents'],
4327 list)
9da76d30 4328 if not isr_contents:
30a074c2 4329 continue
4330 for content in isr_contents:
4331 if not isinstance(content, dict):
4332 continue
4333 video = content.get('videoRenderer')
4334 if not isinstance(video, dict):
4335 continue
4336 video_id = video.get('videoId')
4337 if not video_id:
4338 continue
4339
4340 yield self._extract_video(video)
4341 total += 1
4342 if total == n:
4343 return
0366ae87 4344
0366ae87 4345 if not continuation_token:
6c894ea1 4346 break
0366ae87 4347 data['continuation'] = continuation_token
b05654f0 4348
6c894ea1
U
4349 def _get_n_results(self, query, n):
4350 """Get a specified number of results for a query"""
4351 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4352
c9ae7b95 4353
a3dd9248 4354class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4355 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4356 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4357 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4358 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4359
c9ae7b95 4360
386e1dd9 4361class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4362 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4363 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4364 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4365 # _MAX_RESULTS = 100
3462ffa8 4366 _TESTS = [{
4367 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4368 'playlist_mincount': 5,
4369 'info_dict': {
4370 'title': 'youtube-dl test video',
4371 }
4372 }, {
4373 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4374 'only_matching': True,
4375 }]
4376
386e1dd9 4377 @classmethod
4378 def _make_valid_url(cls):
4379 return cls._VALID_URL
4380
3462ffa8 4381 def _real_extract(self, url):
386e1dd9 4382 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4383 query = (qs.get('search_query') or qs.get('q'))[0]
4384 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4385 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4386
4387
4388class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4389 """
25f14e9f 4390 Base class for feed extractors
3d3dddc9 4391 Subclasses must define the _FEED_NAME property.
d7ae0639 4392 """
b2e8bc1b 4393 _LOGIN_REQUIRED = True
ef2f3c7f 4394 _TESTS = []
d7ae0639
JMF
4395
4396 @property
4397 def IE_NAME(self):
78caa52a 4398 return 'youtube:%s' % self._FEED_NAME
04cc9617 4399
3853309f 4400 def _real_extract(self, url):
3d3dddc9 4401 return self.url_result(
4402 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4403 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4404
4405
ef2f3c7f 4406class YoutubeWatchLaterIE(InfoExtractor):
4407 IE_NAME = 'youtube:watchlater'
70d5c17b 4408 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4409 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4410 _TESTS = [{
8bdd16b4 4411 'url': ':ytwatchlater',
bc7a9cd8
S
4412 'only_matching': True,
4413 }]
25f14e9f
S
4414
4415 def _real_extract(self, url):
ef2f3c7f 4416 return self.url_result(
4417 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4418
4419
25f14e9f
S
4420class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4421 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4422 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4423 _FEED_NAME = 'recommended'
45db527f 4424 _LOGIN_REQUIRED = False
3d3dddc9 4425 _TESTS = [{
4426 'url': ':ytrec',
4427 'only_matching': True,
4428 }, {
4429 'url': ':ytrecommended',
4430 'only_matching': True,
4431 }, {
4432 'url': 'https://youtube.com',
4433 'only_matching': True,
4434 }]
1ed5b5c9 4435
1ed5b5c9 4436
25f14e9f 4437class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4438 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4439 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4440 _FEED_NAME = 'subscriptions'
3d3dddc9 4441 _TESTS = [{
4442 'url': ':ytsubs',
4443 'only_matching': True,
4444 }, {
4445 'url': ':ytsubscriptions',
4446 'only_matching': True,
4447 }]
1ed5b5c9 4448
1ed5b5c9 4449
25f14e9f 4450class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4451 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4452 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4453 _FEED_NAME = 'history'
3d3dddc9 4454 _TESTS = [{
4455 'url': ':ythistory',
4456 'only_matching': True,
4457 }]
1ed5b5c9
JMF
4458
4459
15870e90
PH
4460class YoutubeTruncatedURLIE(InfoExtractor):
4461 IE_NAME = 'youtube:truncated_url'
4462 IE_DESC = False # Do not list
975d35db 4463 _VALID_URL = r'''(?x)
b95aab84
PH
4464 (?:https?://)?
4465 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4466 (?:watch\?(?:
c4808c60 4467 feature=[a-z_]+|
b95aab84
PH
4468 annotation_id=annotation_[^&]+|
4469 x-yt-cl=[0-9]+|
c1708b89 4470 hl=[^&]*|
287be8c6 4471 t=[0-9]+
b95aab84
PH
4472 )?
4473 |
4474 attribution_link\?a=[^&]+
4475 )
4476 $
975d35db 4477 '''
15870e90 4478
c4808c60 4479 _TESTS = [{
2d3d2997 4480 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4481 'only_matching': True,
dc2fc736 4482 }, {
2d3d2997 4483 'url': 'https://www.youtube.com/watch?',
dc2fc736 4484 'only_matching': True,
b95aab84
PH
4485 }, {
4486 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4487 'only_matching': True,
4488 }, {
4489 'url': 'https://www.youtube.com/watch?feature=foo',
4490 'only_matching': True,
c1708b89
PH
4491 }, {
4492 'url': 'https://www.youtube.com/watch?hl=en-GB',
4493 'only_matching': True,
287be8c6
PH
4494 }, {
4495 'url': 'https://www.youtube.com/watch?t=2372',
4496 'only_matching': True,
c4808c60
PH
4497 }]
4498
15870e90
PH
4499 def _real_extract(self, url):
4500 raise ExtractorError(
78caa52a
PH
4501 'Did you forget to quote the URL? Remember that & is a meta '
4502 'character in most shells, so you want to put the URL in quotes, '
3867038a 4503 'like youtube-dl '
2d3d2997 4504 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4505 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4506 expected=True)
772fd5cc
PH
4507
4508
4509class YoutubeTruncatedIDIE(InfoExtractor):
4510 IE_NAME = 'youtube:truncated_id'
4511 IE_DESC = False # Do not list
b95aab84 4512 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4513
4514 _TESTS = [{
4515 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4516 'only_matching': True,
4517 }]
4518
4519 def _real_extract(self, url):
4520 video_id = self._match_id(url)
4521 raise ExtractorError(
4522 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4523 expected=True)