]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[cleanup] Fix linter in 96fccc101f8f579ebd67da176e029803d82634c7
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
94278f72 41 mimetype2ext,
9c0d7f49 42 network_exceptions,
11f9be09 43 orderedSet,
6310acf5 44 parse_codecs,
49bd8c66 45 parse_count,
7c80519c 46 parse_duration,
7ea65411 47 parse_iso8601,
dca3ff4a 48 qualities,
3995d37d 49 remove_start,
cf7e015f 50 smuggle_url,
dbdaaa23 51 str_or_none,
c93d53f5 52 str_to_int,
7c365c21 53 traverse_obj,
556dbe7f 54 try_get,
c5e8d7af
PH
55 unescapeHTML,
56 unified_strdate,
cf7e015f 57 unsmuggle_url,
8bdd16b4 58 update_url_query,
21c340b8 59 url_or_none,
6e6bc8da 60 urlencode_postdata,
fe93e2c4 61 urljoin,
7c365c21 62 variadic,
c5e8d7af
PH
63)
64
5f6a1245 65
201c1459 66def parse_qs(url):
67 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
68
69
000c15a4 70# any clients starting with _ cannot be explicity requested by the user
71INNERTUBE_CLIENTS = {
72 'web': {
73 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
74 'INNERTUBE_CONTEXT': {
75 'client': {
76 'clientName': 'WEB',
77 'clientVersion': '2.20210622.10.00',
78 }
79 },
80 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
81 },
82 'web_embedded': {
83 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
84 'INNERTUBE_CONTEXT': {
85 'client': {
86 'clientName': 'WEB_EMBEDDED_PLAYER',
87 'clientVersion': '1.20210620.0.1',
88 },
89 },
90 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
91 },
92 'web_music': {
93 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
94 'INNERTUBE_HOST': 'music.youtube.com',
95 'INNERTUBE_CONTEXT': {
96 'client': {
97 'clientName': 'WEB_REMIX',
98 'clientVersion': '1.20210621.00.00',
99 }
100 },
101 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
102 },
103 'android': {
104 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
105 'INNERTUBE_CONTEXT': {
106 'client': {
107 'clientName': 'ANDROID',
108 'clientVersion': '16.20',
109 }
110 },
111 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
112 },
113 'android_embedded': {
114 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
115 'INNERTUBE_CONTEXT': {
116 'client': {
117 'clientName': 'ANDROID_EMBEDDED_PLAYER',
118 'clientVersion': '16.20',
119 },
120 },
121 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
122 },
123 'android_music': {
124 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
125 'INNERTUBE_HOST': 'music.youtube.com',
126 'INNERTUBE_CONTEXT': {
127 'client': {
128 'clientName': 'ANDROID_MUSIC',
129 'clientVersion': '4.32',
130 }
131 },
132 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
133 },
3619f78d 134 # ios has HLS live streams
135 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
000c15a4 136 'ios': {
137 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
138 'INNERTUBE_CONTEXT': {
139 'client': {
140 'clientName': 'IOS',
141 'clientVersion': '16.20',
142 }
143 },
144 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
145 },
146 'ios_embedded': {
147 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
148 'INNERTUBE_CONTEXT': {
149 'client': {
150 'clientName': 'IOS_MESSAGES_EXTENSION',
151 'clientVersion': '16.20',
152 },
153 },
154 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
155 },
156 'ios_music': {
157 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
158 'INNERTUBE_HOST': 'music.youtube.com',
159 'INNERTUBE_CONTEXT': {
160 'client': {
161 'clientName': 'IOS_MUSIC',
162 'clientVersion': '4.32',
163 },
164 },
165 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
166 },
3619f78d 167 # mweb has 'ultralow' formats
168 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 169 'mweb': {
170 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
171 'INNERTUBE_CONTEXT': {
172 'client': {
173 'clientName': 'MWEB',
174 'clientVersion': '2.20210721.07.00',
175 }
176 },
177 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
178 },
179}
180
181
182def build_innertube_clients():
65c2fde2 183 third_party = {
184 'embedUrl': 'https://google.com', # Can be any valid URL
185 }
000c15a4 186 base_clients = ('android', 'web', 'ios', 'mweb')
187 priority = qualities(base_clients[::-1])
188
189 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 190 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 191 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
192 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
193 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
194
195 if client in base_clients:
196 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
197 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
65c2fde2 198 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 199 agegate_ytcfg['priority'] -= 1
200 elif client.endswith('_embedded'):
65c2fde2 201 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 202 ytcfg['priority'] -= 2
203 else:
204 ytcfg['priority'] -= 3
205
206
207build_innertube_clients()
208
209
de7f3446 210class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 211 """Provide base functions for Youtube extractors"""
e00eb564 212
3462ffa8 213 _RESERVED_NAMES = (
3619f78d 214 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
215 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
216 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 217 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 218
3619f78d 219 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
220
b2e8bc1b 221 _NETRC_MACHINE = 'youtube'
3619f78d 222
b2e8bc1b
JMF
223 # If True it will raise an error if no login info is provided
224 _LOGIN_REQUIRED = False
225
3619f78d 226 r''' # Unused since login is broken
227 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
228 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
229
230 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
231 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
232 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
233 '''
d0ba5587 234
b2e8bc1b 235 def _login(self):
83317f69 236 """
237 Attempt to log in to YouTube.
238 True is returned if successful or skipped.
239 False is returned if login failed.
240
241 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
242 """
9d5d4d64 243
244 def warn(message):
245 self.report_warning(message)
246
247 # username+password login is broken
982ee69a
MB
248 if (self._LOGIN_REQUIRED
249 and self.get_param('cookiefile') is None
250 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 251 self.raise_login_required(
252 'Login details are needed to download this content', method='cookies')
68217024 253 username, password = self._get_login_info()
9d5d4d64 254 if username:
255 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
256 return
9d5d4d64 257
2d6659b9 258 # Everything below this is broken!
259 r'''
b2e8bc1b
JMF
260 # No authentication to be performed
261 if username is None:
a06916d9 262 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 263 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 264 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 265 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 266 return True
b2e8bc1b 267
7cc3570e
PH
268 login_page = self._download_webpage(
269 self._LOGIN_URL, None,
69ea8ca4
PH
270 note='Downloading login page',
271 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
272 if login_page is False:
273 return
b2e8bc1b 274
1212e997 275 login_form = self._hidden_inputs(login_page)
c5e8d7af 276
e00eb564
S
277 def req(url, f_req, note, errnote):
278 data = login_form.copy()
279 data.update({
280 'pstMsg': 1,
281 'checkConnection': 'youtube',
282 'checkedDomains': 'youtube',
283 'hl': 'en',
284 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 285 'f.req': json.dumps(f_req),
e00eb564
S
286 'flowName': 'GlifWebSignIn',
287 'flowEntry': 'ServiceLogin',
baf67a60
S
288 # TODO: reverse actual botguard identifier generation algo
289 'bgRequest': '["identifier",""]',
041bc3ad 290 })
e00eb564
S
291 return self._download_json(
292 url, None, note=note, errnote=errnote,
293 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
294 fatal=False,
295 data=urlencode_postdata(data), headers={
296 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
297 'Google-Accounts-XSRF': 1,
298 })
299
3995d37d
S
300 lookup_req = [
301 username,
302 None, [], None, 'US', None, None, 2, False, True,
303 [
304 None, None,
305 [2, 1, None, 1,
306 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
307 None, [], 4],
308 1, [None, None, []], None, None, None, True
309 ],
310 username,
311 ]
312
e00eb564 313 lookup_results = req(
3995d37d 314 self._LOOKUP_URL, lookup_req,
e00eb564
S
315 'Looking up account info', 'Unable to look up account info')
316
317 if lookup_results is False:
318 return False
041bc3ad 319
3995d37d
S
320 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
321 if not user_hash:
322 warn('Unable to extract user hash')
323 return False
324
325 challenge_req = [
326 user_hash,
327 None, 1, None, [1, None, None, None, [password, None, True]],
328 [
329 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
330 1, [None, None, []], None, None, None, True
331 ]]
83317f69 332
3995d37d
S
333 challenge_results = req(
334 self._CHALLENGE_URL, challenge_req,
335 'Logging in', 'Unable to log in')
83317f69 336
3995d37d 337 if challenge_results is False:
e00eb564 338 return
83317f69 339
3995d37d
S
340 login_res = try_get(challenge_results, lambda x: x[0][5], list)
341 if login_res:
342 login_msg = try_get(login_res, lambda x: x[5], compat_str)
343 warn(
344 'Unable to login: %s' % 'Invalid password'
345 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
346 return False
347
348 res = try_get(challenge_results, lambda x: x[0][-1], list)
349 if not res:
350 warn('Unable to extract result entry')
351 return False
352
9a6628aa
S
353 login_challenge = try_get(res, lambda x: x[0][0], list)
354 if login_challenge:
355 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
356 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
357 # SEND_SUCCESS - TFA code has been successfully sent to phone
358 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 359 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
360 if status == 'QUOTA_EXCEEDED':
361 warn('Exceeded the limit of TFA codes, try later')
362 return False
363
364 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
365 if not tl:
366 warn('Unable to extract TL')
367 return False
368
369 tfa_code = self._get_tfa_info('2-step verification code')
370
371 if not tfa_code:
372 warn(
373 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
374 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
375 return False
376
377 tfa_code = remove_start(tfa_code, 'G-')
378
379 tfa_req = [
380 user_hash, None, 2, None,
381 [
382 9, None, None, None, None, None, None, None,
383 [None, tfa_code, True, 2]
384 ]]
385
386 tfa_results = req(
387 self._TFA_URL.format(tl), tfa_req,
388 'Submitting TFA code', 'Unable to submit TFA code')
389
390 if tfa_results is False:
391 return False
392
393 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
394 if tfa_res:
395 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
396 warn(
397 'Unable to finish TFA: %s' % 'Invalid TFA code'
398 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
399 return False
400
401 check_cookie_url = try_get(
402 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
403 else:
404 CHALLENGES = {
405 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
406 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
407 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
408 }
409 challenge = CHALLENGES.get(
410 challenge_str,
411 '%s returned error %s.' % (self.IE_NAME, challenge_str))
412 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
413 return False
3995d37d
S
414 else:
415 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
416
417 if not check_cookie_url:
418 warn('Unable to extract CheckCookie URL')
419 return False
e00eb564
S
420
421 check_cookie_results = self._download_webpage(
3995d37d
S
422 check_cookie_url, None, 'Checking cookie', fatal=False)
423
424 if check_cookie_results is False:
425 return False
e00eb564 426
3995d37d
S
427 if 'https://myaccount.google.com/' not in check_cookie_results:
428 warn('Unable to log in')
b2e8bc1b 429 return False
e00eb564 430
b2e8bc1b 431 return True
2d6659b9 432 '''
b2e8bc1b 433
cce889b9 434 def _initialize_consent(self):
435 cookies = self._get_cookies('https://www.youtube.com/')
436 if cookies.get('__Secure-3PSID'):
437 return
438 consent_id = None
439 consent = cookies.get('CONSENT')
440 if consent:
441 if 'YES' in consent.value:
442 return
443 consent_id = self._search_regex(
444 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
445 if not consent_id:
446 consent_id = random.randint(100, 999)
447 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 448
b2e8bc1b 449 def _real_initialize(self):
cce889b9 450 self._initialize_consent()
b2e8bc1b
JMF
451 if self._downloader is None:
452 return
b2e8bc1b
JMF
453 if not self._login():
454 return
c5e8d7af 455
a0566bbf 456 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 457 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
458 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 459
000c15a4 460 def _get_default_ytcfg(self, client='web'):
461 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 462
000c15a4 463 def _get_innertube_host(self, client='web'):
464 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 465
000c15a4 466 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 467 # try_get but with fallback to default ytcfg client values when present
468 _func = lambda y: try_get(y, getter, expected_type)
469 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
470
000c15a4 471 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 472 return self._ytcfg_get_safe(
473 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
474 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 475
314ee305 476 @staticmethod
11f9be09 477 def _extract_session_index(*data):
478 for ytcfg in data:
479 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
480 if session_index is not None:
481 return session_index
314ee305 482
000c15a4 483 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 484 return self._ytcfg_get_safe(
485 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
486 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 487
000c15a4 488 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 489 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
490
000c15a4 491 def _extract_context(self, ytcfg=None, default_client='web'):
109dd3b2 492 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
493 context = _get_context(ytcfg)
494 if context:
495 return context
496
497 context = _get_context(self._get_default_ytcfg(default_client))
498 if not ytcfg:
499 return context
500
501 # Recreate the client context (required)
502 context['client'].update({
503 'clientVersion': self._extract_client_version(ytcfg, default_client),
504 'clientName': self._extract_client_name(ytcfg, default_client),
505 })
506 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
507 if visitor_data:
508 context['client']['visitorData'] = visitor_data
509 return context
510
511 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 512 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
513 # See: https://github.com/yt-dlp/yt-dlp/issues/393
514 yt_cookies = self._get_cookies('https://www.youtube.com')
515 sapisid_cookie = dict_get(
516 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
c926c954 517 if sapisid_cookie is None or not sapisid_cookie.value:
a5c56234
M
518 return
519 time_now = round(time.time())
1974e99f 520 # SAPISID cookie is required if not already present
521 if not yt_cookies.get('SAPISID'):
c926c954 522 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
1974e99f 523 self._set_cookie(
524 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
c926c954 525 self.write_debug('Extracted SAPISID cookie', only_once=True)
1974e99f 526 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
527 sapisidhash = hashlib.sha1(
109dd3b2 528 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 529 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
530
531 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 532 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 533 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 534
109dd3b2 535 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 536 data.update(query)
11f9be09 537 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 538 real_headers.update({'content-type': 'application/json'})
539 if headers:
540 real_headers.update(headers)
545cc85d 541 return self._download_json(
109dd3b2 542 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 543 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 544 data=json.dumps(data).encode('utf8'), headers=real_headers,
545 query={'key': api_key or self._extract_api_key()})
546
11f9be09 547 def extract_yt_initial_data(self, video_id, webpage):
8bdd16b4 548 return self._parse_json(
549 self._search_regex(
29f7c58a 550 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 551 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 552 video_id)
0c148415 553
a1c5d2ca 554 def _extract_identity_token(self, webpage, item_id):
11f9be09 555 if not webpage:
556 return None
557 ytcfg = self.extract_ytcfg(item_id, webpage)
a1c5d2ca
M
558 if ytcfg:
559 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
560 if token:
561 return token
562 return self._search_regex(
563 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
564 'identity token', default=None)
565
566 @staticmethod
fe93e2c4 567 def _extract_account_syncid(*args):
8ea3f7b9 568 """
569 Extract syncId required to download private playlists of secondary channels
fe93e2c4 570 @params response and/or ytcfg
8ea3f7b9 571 """
fe93e2c4 572 for data in args:
573 # ytcfg includes channel_syncid if on secondary channel
574 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
575 if delegated_sid:
576 return delegated_sid
577 sync_ids = (try_get(
578 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
579 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
580 if len(sync_ids) >= 2 and sync_ids[1]:
581 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
582 # and just "user_syncid||" for primary channel. We only want the channel_syncid
583 return sync_ids[0]
a1c5d2ca 584
11f9be09 585 def extract_ytcfg(self, video_id, webpage):
8c54a305 586 if not webpage:
587 return {}
29f7c58a 588 return self._parse_json(
589 self._search_regex(
590 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 591 default='{}'), video_id, fatal=False) or {}
592
11f9be09 593 def generate_api_headers(
594 self, ytcfg=None, identity_token=None, account_syncid=None,
000c15a4 595 visitor_data=None, api_hostname=None, default_client='web', session_index=None):
11f9be09 596 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 597 headers = {
109dd3b2 598 'X-YouTube-Client-Name': compat_str(
11f9be09 599 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
600 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
109dd3b2 601 'Origin': origin
f4f751af 602 }
2d6659b9 603 if not visitor_data and ytcfg:
604 visitor_data = try_get(
11f9be09 605 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 606 if identity_token:
109dd3b2 607 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 608 if account_syncid:
609 headers['X-Goog-PageId'] = account_syncid
314ee305 610 if session_index is None and ytcfg:
611 session_index = self._extract_session_index(ytcfg)
612 if account_syncid or session_index is not None:
613 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 614 if visitor_data:
109dd3b2 615 headers['X-Goog-Visitor-Id'] = visitor_data
616 auth = self._generate_sapisidhash_header(origin)
f4f751af 617 if auth is not None:
618 headers['Authorization'] = auth
109dd3b2 619 headers['X-Origin'] = origin
f4f751af 620 return headers
29f7c58a 621
2d6659b9 622 @staticmethod
623 def _build_api_continuation_query(continuation, ctp=None):
624 query = {
625 'continuation': continuation
626 }
627 # TODO: Inconsistency with clickTrackingParams.
628 # Currently we have a fixed ctp contained within context (from ytcfg)
629 # and a ctp in root query for continuation.
630 if ctp:
631 query['clickTracking'] = {'clickTrackingParams': ctp}
632 return query
633
2d6659b9 634 @classmethod
635 def _extract_next_continuation_data(cls, renderer):
636 next_continuation = try_get(
637 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
638 lambda x: x['continuation']['reloadContinuationData']), dict)
639 if not next_continuation:
640 return
641 continuation = next_continuation.get('continuation')
642 if not continuation:
643 return
644 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 645 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 646
647 @classmethod
648 def _extract_continuation_ep_data(cls, continuation_ep: dict):
649 if isinstance(continuation_ep, dict):
650 continuation = try_get(
651 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
652 if not continuation:
653 return
654 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 655 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 656
657 @classmethod
658 def _extract_continuation(cls, renderer):
659 next_continuation = cls._extract_next_continuation_data(renderer)
660 if next_continuation:
661 return next_continuation
fe93e2c4 662
2d6659b9 663 contents = []
664 for key in ('contents', 'items'):
665 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 666
2d6659b9 667 for content in contents:
668 if not isinstance(content, dict):
669 continue
670 continuation_ep = try_get(
671 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
672 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
673 dict)
674 continuation = cls._extract_continuation_ep_data(continuation_ep)
675 if continuation:
676 return continuation
677
fe93e2c4 678 @classmethod
679 def _extract_alerts(cls, data):
109dd3b2 680 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
681 if not isinstance(alert_dict, dict):
682 continue
683 for alert in alert_dict.values():
684 alert_type = alert.get('type')
685 if not alert_type:
686 continue
052e1350 687 message = cls._get_text(alert, 'text')
109dd3b2 688 if message:
689 yield alert_type, message
690
691 def _report_alerts(self, alerts, expected=True):
692 errors = []
693 warnings = []
694 for alert_type, alert_message in alerts:
695 if alert_type.lower() == 'error':
696 errors.append([alert_type, alert_message])
697 else:
698 warnings.append([alert_type, alert_message])
699
700 for alert_type, alert_message in (warnings + errors[:-1]):
701 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
702 if errors:
703 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
704
705 def _extract_and_report_alerts(self, data, *args, **kwargs):
706 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
707
47193e02 708 def _extract_badges(self, renderer: dict):
709 badges = set()
710 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
711 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
712 if label:
713 badges.add(label.lower())
714 return badges
715
716 @staticmethod
052e1350 717 def _get_text(data, *path_list, max_runs=None):
718 for path in path_list or [None]:
719 if path is None:
720 obj = [data]
721 else:
722 obj = traverse_obj(data, path, default=[])
723 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
724 obj = [obj]
725 for item in obj:
726 text = try_get(item, lambda x: x['simpleText'], compat_str)
727 if text:
728 return text
729 runs = try_get(item, lambda x: x['runs'], list) or []
730 if not runs and isinstance(item, list):
731 runs = item
732
733 runs = runs[:min(len(runs), max_runs or len(runs))]
734 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
735 if text:
736 return text
47193e02 737
109dd3b2 738 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
739 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 740 default_client='web'):
109dd3b2 741 response = None
742 last_error = None
743 count = -1
744 retries = self.get_param('extractor_retries', 3)
745 if check_get_keys is None:
746 check_get_keys = []
747 while count < retries:
748 count += 1
749 if last_error:
750 self.report_warning('%s. Retrying ...' % last_error)
751 try:
752 response = self._call_api(
753 ep=ep, fatal=True, headers=headers,
754 video_id=item_id, query=query,
755 context=self._extract_context(ytcfg, default_client),
756 api_key=self._extract_api_key(ytcfg, default_client),
757 api_hostname=api_hostname, default_client=default_client,
758 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
759 except ExtractorError as e:
9c0d7f49 760 if isinstance(e.cause, network_exceptions):
109dd3b2 761 # Downloading page may result in intermittent 5xx HTTP error
762 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 763 # We also want to catch all other network exceptions since errors in later pages can be troublesome
764 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
765 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
766 last_error = error_to_compat_str(e.cause or e)
767 if count < retries:
768 continue
109dd3b2 769 if fatal:
770 raise
771 else:
772 self.report_warning(error_to_compat_str(e))
773 return
774
775 else:
776 # Youtube may send alerts if there was an issue with the continuation page
777 try:
778 self._extract_and_report_alerts(response, expected=False)
779 except ExtractorError as e:
780 if fatal:
781 raise
782 self.report_warning(error_to_compat_str(e))
783 return
784 if not check_get_keys or dict_get(response, check_get_keys):
785 break
786 # Youtube sometimes sends incomplete data
787 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
788 last_error = 'Incomplete data received'
789 if count >= retries:
790 if fatal:
791 raise ExtractorError(last_error)
792 else:
793 self.report_warning(last_error)
794 return
795 return response
796
9297939e 797 @staticmethod
798 def is_music_url(url):
799 return re.match(r'https?://music\.youtube\.com/', url) is not None
800
30a074c2 801 def _extract_video(self, renderer):
802 video_id = renderer.get('videoId')
052e1350 803 title = self._get_text(renderer, 'title')
804 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 805 duration = parse_duration(self._get_text(
806 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 807 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 808 view_count = str_to_int(self._search_regex(
809 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
810 'view count', default=None))
fe93e2c4 811
052e1350 812 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
fe93e2c4 813
30a074c2 814 return {
39ed931e 815 '_type': 'url',
30a074c2 816 'ie_key': YoutubeIE.ie_key(),
817 'id': video_id,
818 'url': video_id,
819 'title': title,
820 'description': description,
821 'duration': duration,
822 'view_count': view_count,
823 'uploader': uploader,
824 }
825
0c148415 826
360e1ca5 827class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 828 IE_DESC = 'YouTube.com'
bc2ca1bb 829 _INVIDIOUS_SITES = (
830 # invidious-redirect websites
831 r'(?:www\.)?redirect\.invidious\.io',
832 r'(?:(?:www|dev)\.)?invidio\.us',
833 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
834 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 835 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 836 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 837 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 838 # youtube-dl invidious instances list
839 r'(?:(?:www|no)\.)?invidiou\.sh',
840 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
841 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 842 r'(?:www\.)?invidious\.mastodon\.host',
843 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 844 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 845 r'(?:www\.)?invidious\.tinfoil-hat\.net',
846 r'(?:www\.)?invidious\.himiko\.cloud',
847 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 848 r'(?:www\.)?invidious\.tube',
849 r'(?:www\.)?invidiou\.site',
850 r'(?:www\.)?invidious\.site',
851 r'(?:www\.)?invidious\.xyz',
852 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 853 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 854 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 855 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 856 r'(?:www\.)?tube\.poal\.co',
857 r'(?:www\.)?tube\.connect\.cafe',
858 r'(?:www\.)?vid\.wxzm\.sx',
859 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 860 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 861 r'(?:www\.)?yewtu\.be',
862 r'(?:www\.)?yt\.elukerio\.org',
863 r'(?:www\.)?yt\.lelux\.fi',
864 r'(?:www\.)?invidious\.ggc-project\.de',
865 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 866 r'(?:www\.)?ytprivate\.com',
867 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 868 r'(?:www\.)?invidious\.toot\.koeln',
869 r'(?:www\.)?invidious\.fdn\.fr',
870 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 871 r'(?:www\.)?invidious\.namazso\.eu',
872 r'(?:www\.)?invidious\.silkky\.cloud',
873 r'(?:www\.)?invidious\.exonip\.de',
874 r'(?:www\.)?invidious\.riverside\.rocks',
875 r'(?:www\.)?invidious\.blamefran\.net',
876 r'(?:www\.)?invidious\.moomoo\.de',
877 r'(?:www\.)?ytb\.trom\.tf',
878 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 879 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
880 r'(?:www\.)?qklhadlycap4cnod\.onion',
881 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
882 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
883 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
884 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
885 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
886 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 887 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
888 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
889 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
890 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 891 )
cb7dfeea 892 _VALID_URL = r"""(?x)^
c5e8d7af 893 (
edb53e2d 894 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 895 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
896 (?:www\.)?deturl\.com/www\.youtube\.com|
897 (?:www\.)?pwnyoutube\.com|
898 (?:www\.)?hooktube\.com|
899 (?:www\.)?yourepeat\.com|
900 tube\.majestyc\.net|
901 %(invidious)s|
902 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
903 (?:.*?\#/)? # handle anchor (#/) redirect urls
904 (?: # the various things that can precede the ID:
ac7553d0 905 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 906 |(?: # or the v= param in all its forms
f7000f3a 907 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 908 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 909 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
910 v=
911 )
f4b05232 912 ))
cbaed4bb
S
913 |(?:
914 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
915 vid\.plus| # or vid.plus/xxxx
916 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 917 %(invidious)s
cbaed4bb 918 )/
edb53e2d 919 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 920 )
c5e8d7af 921 )? # all until now is optional -> you can pass the naked ID
201c1459 922 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 923 (?(1).+)? # if we found the ID, everything can follow
9297939e 924 (?:\#|$)""" % {
bc2ca1bb 925 'invidious': '|'.join(_INVIDIOUS_SITES),
926 }
e40c758c 927 _PLAYER_INFO_RE = (
cc2db878 928 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
929 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 930 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 931 )
2c62dc26 932 _formats = {
c2d3cb4c 933 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
934 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
935 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
936 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
937 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
938 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
939 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
940 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 941 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 942 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
943 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
944 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
945 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
946 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
947 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 948 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 949 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
950 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 951
952
953 # 3D videos
c2d3cb4c 954 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
955 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
956 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
957 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 958 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
959 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
960 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 961
96fb5605 962 # Apple HTTP Live Streaming
11f12195 963 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 964 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
965 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
966 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
967 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
968 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 969 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
970 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
971
972 # DASH mp4 video
d23028a8
S
973 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
974 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
975 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
976 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
977 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 978 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
979 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
980 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
981 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
982 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
983 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
984 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 985
f6f1fc92 986 # Dash mp4 audio
d23028a8
S
987 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
988 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
989 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
990 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
991 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
992 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
993 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
994
995 # Dash webm
d23028a8
S
996 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
997 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
998 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
999 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1000 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1001 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1002 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1003 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1004 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1005 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1006 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1007 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1008 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1009 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1010 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1011 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1012 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1013 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1014 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1015 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1016 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1017 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1018
1019 # Dash webm audio
d23028a8
S
1020 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1021 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1022
0857baad 1023 # Dash webm audio with opus inside
d23028a8
S
1024 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1025 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1026 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1027
ce6b9a2d
PH
1028 # RTMP (unnamed)
1029 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1030
1031 # av01 video only formats sometimes served with "unknown" codecs
1032 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1033 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1034 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1035 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 1036 }
29f7c58a 1037 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1038
109dd3b2 1039 _AGE_GATE_REASONS = (
1040 'Sign in to confirm your age',
1041 'This video may be inappropriate for some users.',
9275f62c 1042 'Sorry, this content is age-restricted.',
1043 'Please confirm your age.')
1044
1045 _AGE_GATE_STATUS_REASONS = (
1046 'AGE_VERIFICATION_REQUIRED',
1047 'AGE_CHECK_REQUIRED'
1048 )
109dd3b2 1049
fd5c4aab
S
1050 _GEO_BYPASS = False
1051
78caa52a 1052 IE_NAME = 'youtube'
2eb88d95
PH
1053 _TESTS = [
1054 {
2d3d2997 1055 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1056 'info_dict': {
1057 'id': 'BaW_jenozKc',
1058 'ext': 'mp4',
3867038a 1059 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1060 'uploader': 'Philipp Hagemeister',
1061 'uploader_id': 'phihag',
ec85ded8 1062 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
1063 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1064 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1065 'upload_date': '20121002',
3867038a 1066 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 1067 'categories': ['Science & Technology'],
3867038a 1068 'tags': ['youtube-dl'],
556dbe7f 1069 'duration': 10,
dbdaaa23 1070 'view_count': int,
3e7c1224
PH
1071 'like_count': int,
1072 'dislike_count': int,
7c80519c 1073 'start_time': 1,
297a564b 1074 'end_time': 9,
2eb88d95 1075 }
0e853ca4 1076 },
fccd3771 1077 {
4bc3a23e
PH
1078 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1079 'note': 'Embed-only video (#1746)',
1080 'info_dict': {
1081 'id': 'yZIXLfi8CZQ',
1082 'ext': 'mp4',
1083 'upload_date': '20120608',
1084 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1085 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1086 'uploader': 'SET India',
94bfcd23 1087 'uploader_id': 'setindia',
ec85ded8 1088 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1089 'age_limit': 18,
545cc85d 1090 },
1091 'skip': 'Private video',
fccd3771 1092 },
11b56058 1093 {
8bdd16b4 1094 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1095 'note': 'Use the first video ID in the URL',
1096 'info_dict': {
1097 'id': 'BaW_jenozKc',
1098 'ext': 'mp4',
3867038a 1099 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1100 'uploader': 'Philipp Hagemeister',
1101 'uploader_id': 'phihag',
ec85ded8 1102 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1103 'upload_date': '20121002',
3867038a 1104 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1105 'categories': ['Science & Technology'],
3867038a 1106 'tags': ['youtube-dl'],
556dbe7f 1107 'duration': 10,
dbdaaa23 1108 'view_count': int,
11b56058
PM
1109 'like_count': int,
1110 'dislike_count': int,
34a7de29
S
1111 },
1112 'params': {
1113 'skip_download': True,
1114 },
11b56058 1115 },
dd27fd17 1116 {
2d3d2997 1117 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1118 'note': '256k DASH audio (format 141) via DASH manifest',
1119 'info_dict': {
1120 'id': 'a9LDPn-MO4I',
1121 'ext': 'm4a',
1122 'upload_date': '20121002',
1123 'uploader_id': '8KVIDEO',
ec85ded8 1124 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1125 'description': '',
1126 'uploader': '8KVIDEO',
1127 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1128 },
4bc3a23e
PH
1129 'params': {
1130 'youtube_include_dash_manifest': True,
1131 'format': '141',
4919603f 1132 },
de3c7fe0 1133 'skip': 'format 141 not served anymore',
dd27fd17 1134 },
8bdd16b4 1135 # DASH manifest with encrypted signature
1136 {
1137 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1138 'info_dict': {
1139 'id': 'IB3lcPjvWLA',
1140 'ext': 'm4a',
1141 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1142 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1143 'duration': 244,
1144 'uploader': 'AfrojackVEVO',
1145 'uploader_id': 'AfrojackVEVO',
1146 'upload_date': '20131011',
cc2db878 1147 'abr': 129.495,
8bdd16b4 1148 },
1149 'params': {
1150 'youtube_include_dash_manifest': True,
1151 'format': '141/bestaudio[ext=m4a]',
1152 },
1153 },
65c2fde2 1154 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1155 {
65c2fde2 1156 'note': 'Embed allowed age-gate video',
2d3d2997 1157 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1158 'info_dict': {
1159 'id': 'HtVdAasjOgU',
1160 'ext': 'mp4',
1161 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1162 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1163 'duration': 142,
c522adb1
JMF
1164 'uploader': 'The Witcher',
1165 'uploader_id': 'WitcherGame',
ec85ded8 1166 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1167 'upload_date': '20140605',
34952f09 1168 'age_limit': 18,
c522adb1
JMF
1169 },
1170 },
65c2fde2 1171 {
1172 'note': 'Age-gate video with embed allowed in public site',
1173 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1174 'info_dict': {
1175 'id': 'HsUATh_Nc2U',
1176 'ext': 'mp4',
1177 'title': 'Godzilla 2 (Official Video)',
1178 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1179 'upload_date': '20200408',
1180 'uploader_id': 'FlyingKitty900',
1181 'uploader': 'FlyingKitty',
1182 'age_limit': 18,
1183 },
1184 },
1185 {
1186 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1187 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1188 'info_dict': {
1189 'id': 'Tq92D6wQ1mg',
1190 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1191 'ext': 'mp4',
1192 'upload_date': '20191227',
65c2fde2 1193 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1194 'uploader': 'Projekt Melody',
1195 'description': 'md5:17eccca93a786d51bc67646756894066',
1196 'age_limit': 18,
1197 },
1198 },
1199 {
1200 'note': 'Non-Agegated non-embeddable video',
1201 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1202 'info_dict': {
1203 'id': 'MeJVWBSsPAY',
1204 'ext': 'mp4',
1205 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1206 'uploader': 'Herr Lurik',
1207 'uploader_id': 'st3in234',
1208 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1209 'upload_date': '20130730',
1210 },
1211 },
1212 {
1213 'note': 'Non-bypassable age-gated video',
1214 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1215 'only_matching': True,
1216 },
8bdd16b4 1217 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1218 # YouTube Red ad is not captured for creator
1219 {
1220 'url': '__2ABJjxzNo',
1221 'info_dict': {
1222 'id': '__2ABJjxzNo',
1223 'ext': 'mp4',
1224 'duration': 266,
1225 'upload_date': '20100430',
1226 'uploader_id': 'deadmau5',
1227 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1228 'creator': 'deadmau5',
1229 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1230 'uploader': 'deadmau5',
1231 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1232 'alt_title': 'Some Chords',
8bdd16b4 1233 },
1234 'expected_warnings': [
1235 'DASH manifest missing',
1236 ]
1237 },
067aa17e 1238 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1239 {
1240 'url': 'lqQg6PlCWgI',
1241 'info_dict': {
1242 'id': 'lqQg6PlCWgI',
1243 'ext': 'mp4',
556dbe7f 1244 'duration': 6085,
90227264 1245 'upload_date': '20150827',
cbe2bd91 1246 'uploader_id': 'olympic',
ec85ded8 1247 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1248 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1249 'uploader': 'Olympics',
cbe2bd91
PH
1250 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1251 },
1252 'params': {
1253 'skip_download': 'requires avconv',
e52a40ab 1254 }
cbe2bd91 1255 },
6271f1ca
PH
1256 # Non-square pixels
1257 {
1258 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1259 'info_dict': {
1260 'id': '_b-2C3KPAM0',
1261 'ext': 'mp4',
1262 'stretched_ratio': 16 / 9.,
556dbe7f 1263 'duration': 85,
6271f1ca
PH
1264 'upload_date': '20110310',
1265 'uploader_id': 'AllenMeow',
ec85ded8 1266 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1267 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1268 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1269 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1270 },
06b491eb
S
1271 },
1272 # url_encoded_fmt_stream_map is empty string
1273 {
1274 'url': 'qEJwOuvDf7I',
1275 'info_dict': {
1276 'id': 'qEJwOuvDf7I',
f57b7835 1277 'ext': 'webm',
06b491eb
S
1278 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1279 'description': '',
1280 'upload_date': '20150404',
1281 'uploader_id': 'spbelect',
1282 'uploader': 'Наблюдатели Петербурга',
1283 },
1284 'params': {
1285 'skip_download': 'requires avconv',
e323cf3f
S
1286 },
1287 'skip': 'This live event has ended.',
06b491eb 1288 },
067aa17e 1289 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1290 {
1291 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1292 'info_dict': {
1293 'id': 'FIl7x6_3R5Y',
eb6793ba 1294 'ext': 'webm',
da77d856
S
1295 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1296 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1297 'duration': 220,
da77d856
S
1298 'upload_date': '20150625',
1299 'uploader_id': 'dorappi2000',
ec85ded8 1300 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1301 'uploader': 'dorappi2000',
eb6793ba 1302 'formats': 'mincount:31',
da77d856 1303 },
eb6793ba 1304 'skip': 'not actual anymore',
2ee8f5d8 1305 },
8a1a26ce
YCH
1306 # DASH manifest with segment_list
1307 {
1308 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1309 'md5': '8ce563a1d667b599d21064e982ab9e31',
1310 'info_dict': {
1311 'id': 'CsmdDsKjzN8',
1312 'ext': 'mp4',
17ee98e1 1313 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1314 'uploader': 'Airtek',
1315 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1316 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1317 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1318 },
1319 'params': {
1320 'youtube_include_dash_manifest': True,
1321 'format': '135', # bestvideo
be49068d
S
1322 },
1323 'skip': 'This live event has ended.',
2ee8f5d8 1324 },
cf7e015f
S
1325 {
1326 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1327 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1328 'info_dict': {
545cc85d 1329 'id': 'jvGDaLqkpTg',
1330 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1331 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1332 },
1333 'playlist': [{
1334 'info_dict': {
545cc85d 1335 'id': 'jvGDaLqkpTg',
cf7e015f 1336 'ext': 'mp4',
545cc85d 1337 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1338 'description': 'md5:e03b909557865076822aa169218d6a5d',
1339 'duration': 10643,
1340 'upload_date': '20161111',
1341 'uploader': 'Team PGP',
1342 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1343 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1344 },
1345 }, {
1346 'info_dict': {
545cc85d 1347 'id': '3AKt1R1aDnw',
cf7e015f 1348 'ext': 'mp4',
545cc85d 1349 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1350 'description': 'md5:e03b909557865076822aa169218d6a5d',
1351 'duration': 10991,
1352 'upload_date': '20161111',
1353 'uploader': 'Team PGP',
1354 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1355 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1356 },
1357 }, {
1358 'info_dict': {
545cc85d 1359 'id': 'RtAMM00gpVc',
cf7e015f 1360 'ext': 'mp4',
545cc85d 1361 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1362 'description': 'md5:e03b909557865076822aa169218d6a5d',
1363 'duration': 10995,
1364 'upload_date': '20161111',
1365 'uploader': 'Team PGP',
1366 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1367 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1368 },
1369 }, {
1370 'info_dict': {
545cc85d 1371 'id': '6N2fdlP3C5U',
cf7e015f 1372 'ext': 'mp4',
545cc85d 1373 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1374 'description': 'md5:e03b909557865076822aa169218d6a5d',
1375 'duration': 10990,
1376 'upload_date': '20161111',
1377 'uploader': 'Team PGP',
1378 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1379 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1380 },
1381 }],
1382 'params': {
1383 'skip_download': True,
1384 },
65c2fde2 1385 'skip': 'Not multifeed anymore',
cbaed4bb 1386 },
f9f49d87 1387 {
067aa17e 1388 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1389 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1390 'info_dict': {
1391 'id': 'gVfLd0zydlo',
1392 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1393 },
1394 'playlist_count': 2,
be49068d 1395 'skip': 'Not multifeed anymore',
f9f49d87 1396 },
cbaed4bb 1397 {
2d3d2997 1398 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1399 'only_matching': True,
0e49d9a6 1400 },
6d4fc66b 1401 {
2d3d2997 1402 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1403 'only_matching': True,
1404 },
0e49d9a6 1405 {
067aa17e 1406 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1407 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1408 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1409 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1410 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1411 'info_dict': {
1412 'id': 'lsguqyKfVQg',
1413 'ext': 'mp4',
1414 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1415 'alt_title': 'Dark Walk',
0e49d9a6 1416 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1417 'duration': 133,
0e49d9a6
LL
1418 'upload_date': '20151119',
1419 'uploader_id': 'IronSoulElf',
ec85ded8 1420 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1421 'uploader': 'IronSoulElf',
11f9be09 1422 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1423 'track': 'Dark Walk',
1424 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1425 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1426 },
1427 'params': {
1428 'skip_download': True,
1429 },
1430 },
61f92af1 1431 {
067aa17e 1432 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1433 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1434 'only_matching': True,
1435 },
313dfc45
LL
1436 {
1437 # Video with yt:stretch=17:0
1438 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1439 'info_dict': {
1440 'id': 'Q39EVAstoRM',
1441 'ext': 'mp4',
1442 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1443 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1444 'upload_date': '20151107',
1445 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1446 'uploader': 'CH GAMER DROID',
1447 },
1448 'params': {
1449 'skip_download': True,
1450 },
be49068d 1451 'skip': 'This video does not exist.',
313dfc45 1452 },
201c1459 1453 {
1454 # Video with incomplete 'yt:stretch=16:'
1455 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1456 'only_matching': True,
1457 },
7caf9830
S
1458 {
1459 # Video licensed under Creative Commons
1460 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1461 'info_dict': {
1462 'id': 'M4gD1WSo5mA',
1463 'ext': 'mp4',
1464 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1465 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1466 'duration': 721,
7caf9830
S
1467 'upload_date': '20150127',
1468 'uploader_id': 'BerkmanCenter',
ec85ded8 1469 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1470 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1471 'license': 'Creative Commons Attribution license (reuse allowed)',
1472 },
1473 'params': {
1474 'skip_download': True,
1475 },
1476 },
fd050249
S
1477 {
1478 # Channel-like uploader_url
1479 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1480 'info_dict': {
1481 'id': 'eQcmzGIKrzg',
1482 'ext': 'mp4',
1483 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1484 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1485 'duration': 4060,
fd050249 1486 'upload_date': '20151119',
eb6793ba 1487 'uploader': 'Bernie Sanders',
fd050249 1488 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1489 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1490 'license': 'Creative Commons Attribution license (reuse allowed)',
1491 },
1492 'params': {
1493 'skip_download': True,
1494 },
1495 },
040ac686
S
1496 {
1497 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1498 'only_matching': True,
7f29cf54
S
1499 },
1500 {
067aa17e 1501 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1502 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1503 'only_matching': True,
6496ccb4
S
1504 },
1505 {
1506 # Rental video preview
1507 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1508 'info_dict': {
1509 'id': 'uGpuVWrhIzE',
1510 'ext': 'mp4',
1511 'title': 'Piku - Trailer',
1512 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1513 'upload_date': '20150811',
1514 'uploader': 'FlixMatrix',
1515 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1516 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1517 'license': 'Standard YouTube License',
1518 },
1519 'params': {
1520 'skip_download': True,
1521 },
eb6793ba 1522 'skip': 'This video is not available.',
022a5d66 1523 },
12afdc2a
S
1524 {
1525 # YouTube Red video with episode data
1526 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1527 'info_dict': {
1528 'id': 'iqKdEhx-dD4',
1529 'ext': 'mp4',
1530 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1531 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1532 'duration': 2085,
12afdc2a
S
1533 'upload_date': '20170118',
1534 'uploader': 'Vsauce',
1535 'uploader_id': 'Vsauce',
1536 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1537 'series': 'Mind Field',
1538 'season_number': 1,
1539 'episode_number': 1,
1540 },
1541 'params': {
1542 'skip_download': True,
1543 },
1544 'expected_warnings': [
1545 'Skipping DASH manifest',
1546 ],
1547 },
c7121fa7
S
1548 {
1549 # The following content has been identified by the YouTube community
1550 # as inappropriate or offensive to some audiences.
1551 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1552 'info_dict': {
1553 'id': '6SJNVb0GnPI',
1554 'ext': 'mp4',
1555 'title': 'Race Differences in Intelligence',
1556 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1557 'duration': 965,
1558 'upload_date': '20140124',
1559 'uploader': 'New Century Foundation',
1560 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1561 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1562 },
1563 'params': {
1564 'skip_download': True,
1565 },
545cc85d 1566 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1567 },
022a5d66
S
1568 {
1569 # itag 212
1570 'url': '1t24XAntNCY',
1571 'only_matching': True,
fd5c4aab
S
1572 },
1573 {
1574 # geo restricted to JP
1575 'url': 'sJL6WA-aGkQ',
1576 'only_matching': True,
1577 },
cd5a74a2
S
1578 {
1579 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1580 'only_matching': True,
1581 },
bc2ca1bb 1582 {
1583 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1584 'only_matching': True,
1585 },
1586 {
1587 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1588 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1589 'only_matching': True,
1590 },
825cd268
RA
1591 {
1592 # DRM protected
1593 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1594 'only_matching': True,
4fe54c12
S
1595 },
1596 {
1597 # Video with unsupported adaptive stream type formats
1598 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1599 'info_dict': {
1600 'id': 'Z4Vy8R84T1U',
1601 'ext': 'mp4',
1602 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1603 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1604 'duration': 433,
1605 'upload_date': '20130923',
1606 'uploader': 'Amelia Putri Harwita',
1607 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1608 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1609 'formats': 'maxcount:10',
1610 },
1611 'params': {
1612 'skip_download': True,
1613 'youtube_include_dash_manifest': False,
1614 },
5429d6a9 1615 'skip': 'not actual anymore',
5caabd3c 1616 },
1617 {
822b9d9c 1618 # Youtube Music Auto-generated description
5caabd3c 1619 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1620 'info_dict': {
1621 'id': 'MgNrAu2pzNs',
1622 'ext': 'mp4',
1623 'title': 'Voyeur Girl',
1624 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1625 'upload_date': '20190312',
5429d6a9
S
1626 'uploader': 'Stephen - Topic',
1627 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1628 'artist': 'Stephen',
1629 'track': 'Voyeur Girl',
1630 'album': 'it\'s too much love to know my dear',
1631 'release_date': '20190313',
1632 'release_year': 2019,
1633 },
1634 'params': {
1635 'skip_download': True,
1636 },
1637 },
66b48727
RA
1638 {
1639 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1640 'only_matching': True,
1641 },
011e75e6
S
1642 {
1643 # invalid -> valid video id redirection
1644 'url': 'DJztXj2GPfl',
1645 'info_dict': {
1646 'id': 'DJztXj2GPfk',
1647 'ext': 'mp4',
1648 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1649 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1650 'upload_date': '20090125',
1651 'uploader': 'Prochorowka',
1652 'uploader_id': 'Prochorowka',
1653 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1654 'artist': 'Panjabi MC',
1655 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1656 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1657 },
1658 'params': {
1659 'skip_download': True,
1660 },
545cc85d 1661 'skip': 'Video unavailable',
ea74e00b
DP
1662 },
1663 {
1664 # empty description results in an empty string
1665 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1666 'info_dict': {
1667 'id': 'x41yOUIvK2k',
1668 'ext': 'mp4',
1669 'title': 'IMG 3456',
1670 'description': '',
1671 'upload_date': '20170613',
1672 'uploader_id': 'ElevageOrVert',
1673 'uploader': 'ElevageOrVert',
1674 },
1675 'params': {
1676 'skip_download': True,
1677 },
1678 },
a0566bbf 1679 {
29f7c58a 1680 # with '};' inside yt initial data (see [1])
1681 # see [2] for an example with '};' inside ytInitialPlayerResponse
1682 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1683 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1684 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1685 'info_dict': {
1686 'id': 'CHqg6qOn4no',
1687 'ext': 'mp4',
1688 'title': 'Part 77 Sort a list of simple types in c#',
1689 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1690 'upload_date': '20130831',
1691 'uploader_id': 'kudvenkat',
1692 'uploader': 'kudvenkat',
1693 },
1694 'params': {
1695 'skip_download': True,
1696 },
1697 },
29f7c58a 1698 {
1699 # another example of '};' in ytInitialData
1700 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1701 'only_matching': True,
1702 },
1703 {
1704 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1705 'only_matching': True,
1706 },
545cc85d 1707 {
cc2db878 1708 # https://github.com/ytdl-org/youtube-dl/pull/28094
1709 'url': 'OtqTfy26tG0',
1710 'info_dict': {
1711 'id': 'OtqTfy26tG0',
1712 'ext': 'mp4',
1713 'title': 'Burn Out',
1714 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1715 'upload_date': '20141120',
1716 'uploader': 'The Cinematic Orchestra - Topic',
1717 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1718 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1719 'artist': 'The Cinematic Orchestra',
1720 'track': 'Burn Out',
1721 'album': 'Every Day',
1722 'release_data': None,
1723 'release_year': None,
1724 },
1725 'params': {
1726 'skip_download': True,
1727 },
545cc85d 1728 },
bc2ca1bb 1729 {
1730 # controversial video, only works with bpctr when authenticated with cookies
1731 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1732 'only_matching': True,
1733 },
a1a7907b 1734 {
1735 # controversial video, requires bpctr/contentCheckOk
1736 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1737 'info_dict': {
1738 'id': 'SZJvDhaSDnc',
1739 'ext': 'mp4',
1740 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1741 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1742 'uploader': 'CBS This Morning',
11f9be09 1743 'uploader_id': 'CBSThisMorning',
a1a7907b 1744 'upload_date': '20140716',
1745 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1746 }
1747 },
f7ad7160 1748 {
1749 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1750 'url': 'cBvYw8_A0vQ',
1751 'info_dict': {
1752 'id': 'cBvYw8_A0vQ',
1753 'ext': 'mp4',
1754 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1755 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1756 'upload_date': '20201120',
1757 'uploader': 'Walk around Japan',
1758 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1759 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1760 },
1761 'params': {
1762 'skip_download': True,
1763 },
0fb983f6 1764 }, {
1765 # Has multiple audio streams
1766 'url': 'WaOKSUlf4TM',
1767 'only_matching': True
9297939e 1768 }, {
1769 # Requires Premium: has format 141 when requested using YTM url
1770 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1771 'only_matching': True
1772 }, {
120916da 1773 # multiple subtitles with same lang_code
1774 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1775 'only_matching': True,
109dd3b2 1776 }, {
1777 # Force use android client fallback
1778 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1779 'info_dict': {
1780 'id': 'YOelRv7fMxY',
11f9be09 1781 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1782 'ext': '3gp',
1783 'upload_date': '20210624',
1784 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1785 'uploader': 'colinfurze',
11f9be09 1786 'uploader_id': 'colinfurze',
109dd3b2 1787 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1788 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1789 },
1790 'params': {
1791 'format': '17', # 3gp format available on android
1792 'extractor_args': {'youtube': {'player_client': ['android']}},
1793 },
120916da 1794 },
109dd3b2 1795 {
1796 # Skip download of additional client configs (remix client config in this case)
1797 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1798 'only_matching': True,
1799 'params': {
1800 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1801 },
1802 }
2eb88d95
PH
1803 ]
1804
201c1459 1805 @classmethod
1806 def suitable(cls, url):
1bdae7d3 1807 # Hack for lazy extractors until more generic solution is implemented
1808 # (see #28780)
1809 from .youtube import parse_qs
201c1459 1810 qs = parse_qs(url)
1811 if qs.get('list', [None])[0]:
1812 return False
1813 return super(YoutubeIE, cls).suitable(url)
1814
e0df6211
PH
1815 def __init__(self, *args, **kwargs):
1816 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1817 self._code_cache = {}
83799698 1818 self._player_cache = {}
e0df6211 1819
109dd3b2 1820 def _extract_player_url(self, ytcfg=None, webpage=None):
1821 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
11f9be09 1822 if not player_url and webpage:
109dd3b2 1823 player_url = self._search_regex(
1824 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1825 webpage, 'player URL', fatal=False)
11f9be09 1826 if not player_url:
1827 return None
109dd3b2 1828 if player_url.startswith('//'):
1829 player_url = 'https:' + player_url
1830 elif not re.match(r'https?://', player_url):
1831 player_url = compat_urlparse.urljoin(
1832 'https://www.youtube.com', player_url)
1833 return player_url
1834
60064c53
PH
1835 def _signature_cache_id(self, example_sig):
1836 """ Return a string representation of a signature """
78caa52a 1837 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1838
e40c758c
S
1839 @classmethod
1840 def _extract_player_info(cls, player_url):
1841 for player_re in cls._PLAYER_INFO_RE:
1842 id_m = re.search(player_re, player_url)
1843 if id_m:
1844 break
1845 else:
c081b35c 1846 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1847 return id_m.group('id')
e40c758c 1848
109dd3b2 1849 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1850 player_id = self._extract_player_info(player_url)
1851 if player_id not in self._code_cache:
1852 self._code_cache[player_id] = self._download_webpage(
1853 player_url, video_id, fatal=fatal,
1854 note='Downloading player ' + player_id,
1855 errnote='Download of %s failed' % player_url)
1856 return player_id in self._code_cache
1857
e40c758c 1858 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1859 player_id = self._extract_player_info(player_url)
e0df6211 1860
c4417ddb 1861 # Read from filesystem cache
545cc85d 1862 func_id = 'js_%s_%s' % (
1863 player_id, self._signature_cache_id(example_sig))
c4417ddb 1864 assert os.path.basename(func_id) == func_id
a0e07d31 1865
69ea8ca4 1866 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1867 if cache_spec is not None:
78caa52a 1868 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1869
109dd3b2 1870 if self._load_player(video_id, player_url):
1871 code = self._code_cache[player_id]
1872 res = self._parse_sig_js(code)
e0df6211 1873
109dd3b2 1874 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1875 cache_res = res(test_string)
1876 cache_spec = [ord(c) for c in cache_res]
83799698 1877
109dd3b2 1878 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1879 return res
83799698 1880
60064c53 1881 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1882 def gen_sig_code(idxs):
1883 def _genslice(start, end, step):
78caa52a 1884 starts = '' if start == 0 else str(start)
8bcc8756 1885 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1886 steps = '' if step == 1 else (':%d' % step)
78caa52a 1887 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1888
1889 step = None
7af808a5
PH
1890 # Quelch pyflakes warnings - start will be set when step is set
1891 start = '(Never used)'
edf3e38e
PH
1892 for i, prev in zip(idxs[1:], idxs[:-1]):
1893 if step is not None:
1894 if i - prev == step:
1895 continue
1896 yield _genslice(start, prev, step)
1897 step = None
1898 continue
1899 if i - prev in [-1, 1]:
1900 step = i - prev
1901 start = prev
1902 continue
1903 else:
78caa52a 1904 yield 's[%d]' % prev
edf3e38e 1905 if step is None:
78caa52a 1906 yield 's[%d]' % i
edf3e38e
PH
1907 else:
1908 yield _genslice(start, i, step)
1909
78caa52a 1910 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1911 cache_res = func(test_string)
edf3e38e 1912 cache_spec = [ord(c) for c in cache_res]
78caa52a 1913 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1914 signature_id_tuple = '(%s)' % (
1915 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1916 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1917 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1918 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1919
e0df6211
PH
1920 def _parse_sig_js(self, jscode):
1921 funcname = self._search_regex(
abefc03f
S
1922 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1923 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1924 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1925 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1926 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1927 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1928 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1929 # Obsolete patterns
1930 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1931 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1932 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1933 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1934 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1935 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1936 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1937 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1938 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1939
1940 jsi = JSInterpreter(jscode)
1941 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1942 return lambda s: initial_function([s])
1943
545cc85d 1944 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1945 """Turn the encrypted s field into a working signature"""
6b37f0be 1946
c8bf86d5 1947 if player_url is None:
69ea8ca4 1948 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1949
c8bf86d5 1950 try:
62af3a0e 1951 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1952 if player_id not in self._player_cache:
1953 func = self._extract_signature_function(
60064c53 1954 video_id, player_url, s
c8bf86d5
PH
1955 )
1956 self._player_cache[player_id] = func
1957 func = self._player_cache[player_id]
a06916d9 1958 if self.get_param('youtube_print_sig_code'):
60064c53 1959 self._print_sig_code(func, s)
c8bf86d5
PH
1960 return func(s)
1961 except Exception as e:
1962 tb = traceback.format_exc()
1963 raise ExtractorError(
78caa52a 1964 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1965
109dd3b2 1966 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1967 """
1968 Extract signatureTimestamp (sts)
1969 Required to tell API what sig/player version is in use.
1970 """
1971 sts = None
1972 if isinstance(ytcfg, dict):
1973 sts = int_or_none(ytcfg.get('STS'))
1974
1975 if not sts:
1976 # Attempt to extract from player
1977 if player_url is None:
1978 error_msg = 'Cannot extract signature timestamp without player_url.'
1979 if fatal:
1980 raise ExtractorError(error_msg)
1981 self.report_warning(error_msg)
1982 return
1983 if self._load_player(video_id, player_url, fatal=fatal):
1984 player_id = self._extract_player_info(player_url)
1985 code = self._code_cache[player_id]
1986 sts = int_or_none(self._search_regex(
1987 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1988 'JS player signature timestamp', group='sts', fatal=fatal))
1989 return sts
1990
11f9be09 1991 def _mark_watched(self, video_id, player_responses):
352d63fd 1992 playback_url = traverse_obj(
1993 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1994 expected_type=url_or_none, get_all=False)
d77ab8e2 1995 if not playback_url:
352d63fd 1996 self.report_warning('Unable to mark watched')
d77ab8e2
S
1997 return
1998 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1999 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2000
2001 # cpn generation algorithm is reverse engineered from base.js.
2002 # In fact it works even with dummy cpn.
2003 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2004 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2005
2006 qs.update({
2007 'ver': ['2'],
2008 'cpn': [cpn],
2009 })
2010 playback_url = compat_urlparse.urlunparse(
15707c7e 2011 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
2012
2013 self._download_webpage(
2014 playback_url, video_id, 'Marking watched',
2015 'Unable to mark watched', fatal=False)
2016
66c9fa36
S
2017 @staticmethod
2018 def _extract_urls(webpage):
2019 # Embedded YouTube player
2020 entries = [
2021 unescapeHTML(mobj.group('url'))
2022 for mobj in re.finditer(r'''(?x)
2023 (?:
2024 <iframe[^>]+?src=|
2025 data-video-url=|
2026 <embed[^>]+?src=|
2027 embedSWF\(?:\s*|
2028 <object[^>]+data=|
2029 new\s+SWFObject\(
2030 )
2031 (["\'])
2032 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2033 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2034 \1''', webpage)]
2035
2036 # lazyYT YouTube embed
2037 entries.extend(list(map(
2038 unescapeHTML,
2039 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2040
2041 # Wordpress "YouTube Video Importer" plugin
2042 matches = re.findall(r'''(?x)<div[^>]+
2043 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2044 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2045 entries.extend(m[-1] for m in matches)
2046
2047 return entries
2048
2049 @staticmethod
2050 def _extract_url(webpage):
2051 urls = YoutubeIE._extract_urls(webpage)
2052 return urls[0] if urls else None
2053
97665381
PH
2054 @classmethod
2055 def extract_id(cls, url):
2056 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2057 if mobj is None:
69ea8ca4 2058 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
2059 video_id = mobj.group(2)
2060 return video_id
2061
7c365c21 2062 def _extract_chapters_from_json(self, data, duration):
2063 chapter_list = traverse_obj(
2064 data, (
2065 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2066 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2067 ), expected_type=list)
2068
2069 return self._extract_chapters(
2070 chapter_list,
2071 chapter_time=lambda chapter: float_or_none(
2072 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2073 chapter_title=lambda chapter: traverse_obj(
2074 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2075 duration=duration)
2076
2077 def _extract_chapters_from_engagement_panel(self, data, duration):
2078 content_list = traverse_obj(
8bdd16b4 2079 data,
7c365c21 2080 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2081 expected_type=list, default=[])
052e1350 2082 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2083 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2084
2085 return next((
2086 filter(None, (
2087 self._extract_chapters(
2088 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2089 chapter_time, chapter_title, duration)
2090 for contents in content_list
2091 ))), [])
2092
2093 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2094 chapters = []
7c365c21 2095 last_chapter = {'start_time': 0}
2096 for idx, chapter in enumerate(chapter_list or []):
2097 title = chapter_title(chapter)
84213ea8
S
2098 start_time = chapter_time(chapter)
2099 if start_time is None:
2100 continue
7c365c21 2101 last_chapter['end_time'] = start_time
2102 if start_time < last_chapter['start_time']:
2103 if idx == 1:
2104 chapters.pop()
2105 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2106 else:
2107 self.report_warning(f'Invalid start time for chapter "{title}"')
2108 continue
2109 last_chapter = {'start_time': start_time, 'title': title}
2110 chapters.append(last_chapter)
2111 last_chapter['end_time'] = duration
84213ea8
S
2112 return chapters
2113
545cc85d 2114 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2115 return self._parse_json(self._search_regex(
2116 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2117 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2118
d92f5d5a 2119 @staticmethod
2120 def parse_time_text(time_text):
2121 """
2122 Parse the comment time text
2123 time_text is in the format 'X units ago (edited)'
2124 """
2125 time_text_split = time_text.split(' ')
2126 if len(time_text_split) >= 3:
da503b7a 2127 try:
2128 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2129 except ValueError:
2130 return None
d92f5d5a 2131
a1c5d2ca
M
2132 def _extract_comment(self, comment_renderer, parent=None):
2133 comment_id = comment_renderer.get('commentId')
2134 if not comment_id:
2135 return
fe93e2c4 2136
052e1350 2137 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2138
49bd8c66 2139 # note: timestamp is an estimate calculated from the current time and time_text
052e1350 2140 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
fe93e2c4 2141 time_text_dt = self.parse_time_text(time_text)
2142 if isinstance(time_text_dt, datetime.datetime):
2143 timestamp = calendar.timegm(time_text_dt.timetuple())
052e1350 2144 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2145 author_id = try_get(comment_renderer,
2146 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2147
49bd8c66 2148 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2149 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2150 author_thumbnail = try_get(comment_renderer,
2151 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2152
2153 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2154 is_favorited = 'creatorHeart' in (try_get(
2155 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2156 return {
2157 'id': comment_id,
2158 'text': text,
d92f5d5a 2159 'timestamp': timestamp,
a1c5d2ca
M
2160 'time_text': time_text,
2161 'like_count': votes,
97524332 2162 'is_favorited': is_favorited,
a1c5d2ca
M
2163 'author': author,
2164 'author_id': author_id,
2165 'author_thumbnail': author_thumbnail,
2166 'author_is_uploader': author_is_uploader,
2167 'parent': parent or 'root'
2168 }
2169
2170 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2171 ytcfg, video_id, parent=None, comment_counts=None):
2172
2173 def extract_header(contents):
2174 _total_comments = 0
2175 _continuation = None
2176 for content in contents:
2177 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2178 expected_comment_count = parse_count(self._get_text(
052e1350 2179 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2180
2d6659b9 2181 if expected_comment_count:
fe93e2c4 2182 comment_counts[1] = expected_comment_count
2183 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2184 _total_comments = comment_counts[1]
2185 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2186 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2187
2188 sort_menu_item = try_get(
2189 comments_header_renderer,
2190 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2191 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2192
2193 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2194 if not _continuation:
2195 continue
2196
2197 sort_text = sort_menu_item.get('title')
2198 if isinstance(sort_text, compat_str):
2199 sort_text = sort_text.lower()
2200 else:
2201 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2202 self.to_screen('Sorting comments by %s' % sort_text)
2203 break
2204 return _total_comments, _continuation
a1c5d2ca 2205
2d6659b9 2206 def extract_thread(contents):
a1c5d2ca
M
2207 if not parent:
2208 comment_counts[2] = 0
2209 for content in contents:
2210 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2211 comment_renderer = try_get(
2212 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2213 content, (lambda x: x['commentRenderer'], dict))
2214
2215 if not comment_renderer:
2216 continue
2217 comment = self._extract_comment(comment_renderer, parent)
2218 if not comment:
2219 continue
2220 comment_counts[0] += 1
2221 yield comment
2222 # Attempt to get the replies
2223 comment_replies_renderer = try_get(
2224 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2225
2226 if comment_replies_renderer:
2227 comment_counts[2] += 1
2228 comment_entries_iter = self._comment_entries(
f4f751af 2229 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2230 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2231
2232 for reply_comment in comment_entries_iter:
2233 yield reply_comment
2234
2d6659b9 2235 # YouTube comments have a max depth of 2
2236 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2237 if max_depth == 1 and parent:
2238 return
a1c5d2ca
M
2239 if not comment_counts:
2240 # comment so far, est. total comments, current comment thread #
2241 comment_counts = [0, 0, 0]
a1c5d2ca 2242
2d6659b9 2243 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2244 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2245 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2246 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2247 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2248
2249 visitor_data = None
2250 is_first_continuation = parent is None
a1c5d2ca
M
2251
2252 for page_num in itertools.count(0):
2253 if not continuation:
2254 break
11f9be09 2255 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2256 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2257 if page_num == 0:
2258 if is_first_continuation:
2259 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2260 else:
2d6659b9 2261 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2262 comment_counts[2], comment_prog_str)
2263 else:
2264 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2265 ' ' if parent else '', ' replies' if parent else '',
2266 page_num, comment_prog_str)
2267
2268 response = self._extract_response(
fe93e2c4 2269 item_id=None, query=continuation,
2d6659b9 2270 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2271 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2272 if not response:
2273 break
f4f751af 2274 visitor_data = try_get(
2275 response,
2276 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2277 compat_str) or visitor_data
a1c5d2ca 2278
2d6659b9 2279 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2280
2d6659b9 2281 continuation = None
2282 if isinstance(continuation_contents, list):
2283 for continuation_section in continuation_contents:
2284 if not isinstance(continuation_section, dict):
2285 continue
2286 continuation_items = try_get(
2287 continuation_section,
2288 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2289 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2290 list) or []
2291 if is_first_continuation:
2292 total_comments, continuation = extract_header(continuation_items)
2293 if total_comments:
2294 yield total_comments
2295 is_first_continuation = False
2296 if continuation:
2297 break
2298 continue
2299 count = 0
2300 for count, entry in enumerate(extract_thread(continuation_items)):
2301 yield entry
2302 continuation = self._extract_continuation({'contents': continuation_items})
2303 if continuation:
2304 # Sometimes YouTube provides a continuation without any comments
2305 # In most cases we end up just downloading these with very little comments to come.
2306 if count == 0:
2307 if not parent:
2308 self.report_warning('No comments received - assuming end of comments')
2309 continuation = None
a1c5d2ca
M
2310 break
2311
2d6659b9 2312 # Deprecated response structure
2313 elif isinstance(continuation_contents, dict):
2314 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2315 for key, continuation_renderer in continuation_contents.items():
2316 if key not in known_continuation_renderers:
2317 continue
2318 if not isinstance(continuation_renderer, dict):
2319 continue
2320 if is_first_continuation:
2321 header_continuation_items = [continuation_renderer.get('header') or {}]
2322 total_comments, continuation = extract_header(header_continuation_items)
2323 if total_comments:
2324 yield total_comments
2325 is_first_continuation = False
2326 if continuation:
2327 break
a1c5d2ca 2328
2d6659b9 2329 # Sometimes YouTube provides a continuation without any comments
2330 # In most cases we end up just downloading these with very little comments to come.
2331 count = 0
2332 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2333 yield entry
2334 continuation = self._extract_continuation(continuation_renderer)
2335 if count == 0:
2336 if not parent:
2337 self.report_warning('No comments received - assuming end of comments')
2338 continuation = None
2339 break
a1c5d2ca 2340
2d6659b9 2341 @staticmethod
2342 def _generate_comment_continuation(video_id):
2343 """
2344 Generates initial comment section continuation token from given video id
2345 """
2346 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2347 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2348 new_continuation_intlist = list(itertools.chain.from_iterable(
2349 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2350 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2351
2352 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2353 """Entry for comment extraction"""
2d6659b9 2354 def _real_comment_extract(contents):
2355 if isinstance(contents, list):
2356 for entry in contents:
2357 for key, renderer in entry.items():
2358 if key not in known_entry_comment_renderers:
2359 continue
2360 yield from self._comment_entries(
2361 renderer, video_id=video_id, ytcfg=ytcfg,
2362 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2363 account_syncid=self._extract_account_syncid(ytcfg))
2364 break
a1c5d2ca 2365 comments = []
2d6659b9 2366 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2367 estimated_total = 0
2d6659b9 2368 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
65524694 2369 # Force English regardless of account setting to prevent parsing issues
2370 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2371 ytcfg = copy.deepcopy(ytcfg)
2372 traverse_obj(
2373 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2d6659b9 2374 try:
2375 for comment in _real_comment_extract(contents):
2376 if len(comments) >= max_comments:
2377 break
2378 if isinstance(comment, int):
2379 estimated_total = comment
2380 continue
2381 comments.append(comment)
2382 except KeyboardInterrupt:
2383 self.to_screen('Interrupted by user')
d92f5d5a 2384 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2385 return {
2386 'comments': comments,
2387 'comment_count': len(comments),
2388 }
2389
109dd3b2 2390 @staticmethod
2391 def _generate_player_context(sts=None):
2392 context = {
2393 'html5Preference': 'HTML5_PREF_WANTS',
2394 }
2395 if sts is not None:
2396 context['signatureTimestamp'] = sts
2397 return {
2398 'playbackContext': {
2399 'contentPlaybackContext': context
a1a7907b 2400 },
2fd226f6 2401 'contentCheckOk': True,
2402 'racyCheckOk': True
109dd3b2 2403 }
2404
9275f62c 2405 def _is_agegated(self, player_response):
2406 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2407 for reason in reasons:
2408 if reason in self._AGE_GATE_REASONS + self._AGE_GATE_STATUS_REASONS:
2409 return True
2410 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')) is not None:
2411 return True
2412 return False
2413
11f9be09 2414 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
109dd3b2 2415
11f9be09 2416 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2417 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2418 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2419 headers = self.generate_api_headers(
2420 player_ytcfg, identity_token, syncid,
000c15a4 2421 default_client=client, session_index=session_index)
9297939e 2422
11f9be09 2423 yt_query = {'videoId': video_id}
2424 yt_query.update(self._generate_player_context(sts))
2425 return self._extract_response(
2426 item_id=video_id, ep='player', query=yt_query,
2427 ytcfg=player_ytcfg, headers=headers, fatal=False,
000c15a4 2428 default_client=client,
11f9be09 2429 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2430 ) or None
2431
11f9be09 2432 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2433 requested_clients = []
000c15a4 2434 allowed_clients = sorted(
2435 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2436 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2437 for client in self._configuration_arg('player_client'):
2438 if client in allowed_clients:
2439 requested_clients.append(client)
2440 elif client == 'all':
2441 requested_clients.extend(allowed_clients)
2442 else:
2443 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2444 if not requested_clients:
2445 requested_clients = ['android', 'web']
cf7e015f 2446
11f9be09 2447 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2448 requested_clients.extend(
2449 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
dbdaaa23 2450
11f9be09 2451 return orderedSet(requested_clients)
cf7e015f 2452
c0bc527b
M
2453 def _extract_player_ytcfg(self, client, video_id):
2454 url = {
2455 'web_music': 'https://music.youtube.com',
2456 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2457 }.get(client)
2458 if not url:
2459 return {}
2460 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2461 return self.extract_ytcfg(video_id, webpage) or {}
2462
11f9be09 2463 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2464 initial_pr = None
2465 if webpage:
2466 initial_pr = self._extract_yt_initial_variable(
2467 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2468 video_id, 'initial player response')
6b09401b 2469
c0bc527b
M
2470 original_clients = clients
2471 clients = clients[::-1]
2472 while clients:
2473 client = clients.pop()
11f9be09 2474 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2475 if 'configs' not in self._configuration_arg('player_skip'):
2476 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b
M
2477
2478 pr = (
2479 initial_pr if client == 'web' and initial_pr
2480 else self._extract_player_response(
2481 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr))
11f9be09 2482 if pr:
2483 yield pr
c0bc527b 2484
9275f62c 2485 if self._is_agegated(pr):
c0bc527b 2486 client = f'{client}_agegate'
000c15a4 2487 if client in INNERTUBE_CLIENTS and client not in original_clients:
c0bc527b
M
2488 clients.append(client)
2489
11f9be09 2490 # Android player_response does not have microFormats which are needed for
2491 # extraction of some data. So we return the initial_pr with formats
2492 # stripped out even if not requested by the user
2493 # See: https://github.com/yt-dlp/yt-dlp/issues/501
c0bc527b 2494 if initial_pr and 'web' not in original_clients:
11f9be09 2495 initial_pr['streamingData'] = None
2496 yield initial_pr
2497
2498 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2499 itags, stream_ids = [], []
2a9c6dcd 2500 itag_qualities, res_qualities = {}, {}
d3fc8074 2501 q = qualities([
2a9c6dcd 2502 # Normally tiny is the smallest video-only formats. But
2503 # audio-only formats with unknown quality may get tagged as tiny
2504 'tiny',
2505 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2506 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2507 ])
11f9be09 2508 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2509
545cc85d 2510 for fmt in streaming_formats:
2511 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2512 continue
321bf820 2513
cc2db878 2514 itag = str_or_none(fmt.get('itag'))
9297939e 2515 audio_track = fmt.get('audioTrack') or {}
2516 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2517 if stream_id in stream_ids:
2518 continue
2519
cc2db878 2520 quality = fmt.get('quality')
2a9c6dcd 2521 height = int_or_none(fmt.get('height'))
d3fc8074 2522 if quality == 'tiny' or not quality:
2523 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2524 # The 3gp format (17) in android client has a quality of "small",
2525 # but is actually worse than other formats
2526 if itag == '17':
2527 quality = 'tiny'
2528 if quality:
2529 if itag:
2530 itag_qualities[itag] = quality
2531 if height:
2532 res_qualities[height] = quality
cc2db878 2533 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2534 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2535 # number of fragment that would subsequently requested with (`&sq=N`)
2536 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2537 continue
2538
545cc85d 2539 fmt_url = fmt.get('url')
2540 if not fmt_url:
2541 sc = compat_parse_qs(fmt.get('signatureCipher'))
2542 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2543 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2544 if not (sc and fmt_url and encrypted_sig):
2545 continue
545cc85d 2546 if not player_url:
201e9eaa 2547 continue
545cc85d 2548 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2549 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2550 fmt_url += '&' + sp + '=' + signature
2551
545cc85d 2552 if itag:
2553 itags.append(itag)
9297939e 2554 stream_ids.append(stream_id)
2555
cc2db878 2556 tbr = float_or_none(
2557 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2558 dct = {
2559 'asr': int_or_none(fmt.get('audioSampleRate')),
2560 'filesize': int_or_none(fmt.get('contentLength')),
2561 'format_id': itag,
11f9be09 2562 'format_note': ', '.join(filter(None, (
2a9c6dcd 2563 audio_track.get('displayName'),
2564 fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
545cc85d 2565 'fps': int_or_none(fmt.get('fps')),
2a9c6dcd 2566 'height': height,
dca3ff4a 2567 'quality': q(quality),
cc2db878 2568 'tbr': tbr,
545cc85d 2569 'url': fmt_url,
2a9c6dcd 2570 'width': int_or_none(fmt.get('width')),
0fb983f6 2571 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2572 }
60bdb7bd 2573 mime_mobj = re.match(
2574 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2575 if mime_mobj:
2576 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2577 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2578 no_audio = dct.get('acodec') == 'none'
2579 no_video = dct.get('vcodec') == 'none'
2580 if no_audio:
2581 dct['vbr'] = tbr
2582 if no_video:
2583 dct['abr'] = tbr
2584 if no_audio or no_video:
545cc85d 2585 dct['downloader_options'] = {
2586 # Youtube throttles chunks >~10M
2587 'http_chunk_size': 10485760,
bf1317d2 2588 }
7c60c33e 2589 if dct.get('ext'):
2590 dct['container'] = dct['ext'] + '_dash'
11f9be09 2591 yield dct
545cc85d 2592
4bb6b02f 2593 skip_manifests = self._configuration_arg('skip')
11f9be09 2594 get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
5d3a0e79 2595 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2596
2a9c6dcd 2597 def guess_quality(f):
2598 for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2599 if val in qdict:
2600 return q(qdict[val])
2601 return -1
2602
11f9be09 2603 for sd in streaming_data:
5d3a0e79 2604 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2605 if hls_manifest_url:
2a9c6dcd 2606 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
9297939e 2607 itag = self._search_regex(
2608 r'/itag/(\d+)', f['url'], 'itag', default=None)
11f9be09 2609 if itag in itags:
2610 continue
9297939e 2611 if itag:
2612 f['format_id'] = itag
11f9be09 2613 itags.append(itag)
2a9c6dcd 2614 f['quality'] = guess_quality(f)
11f9be09 2615 yield f
545cc85d 2616
5d3a0e79 2617 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2618 if dash_manifest_url:
2a9c6dcd 2619 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
5d3a0e79 2620 itag = f['format_id']
2621 if itag in itags:
2622 continue
11f9be09 2623 if itag:
2624 itags.append(itag)
2a9c6dcd 2625 f['quality'] = guess_quality(f)
5d3a0e79 2626 filesize = int_or_none(self._search_regex(
2627 r'/clen/(\d+)', f.get('fragment_base_url')
2628 or f['url'], 'file size', default=None))
2629 if filesize:
2630 f['filesize'] = filesize
11f9be09 2631 yield f
2632
2633 def _real_extract(self, url):
2634 url, smuggled_data = unsmuggle_url(url, {})
2635 video_id = self._match_id(url)
2636
2637 base_url = self.http_scheme() + '//www.youtube.com/'
2638 webpage_url = base_url + 'watch?v=' + video_id
2639 webpage = self._download_webpage(
2640 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2641
2642 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2643 player_url = self._extract_player_url(master_ytcfg, webpage)
2644 identity_token = self._extract_identity_token(webpage, video_id)
2645
2646 player_responses = list(self._extract_player_responses(
2647 self._get_requested_clients(url, smuggled_data),
2648 video_id, webpage, master_ytcfg, player_url, identity_token))
2649
352d63fd 2650 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2651
2652 playability_statuses = traverse_obj(
2653 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2654
2655 trailer_video_id = get_first(
2656 playability_statuses,
2657 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2658 expected_type=str)
2659 if trailer_video_id:
2660 return self.url_result(
2661 trailer_video_id, self.ie_key(), trailer_video_id)
2662
2663 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2664 if webpage else (lambda x: None))
2665
2666 video_details = traverse_obj(
2667 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2668 microformats = traverse_obj(
2669 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2670 expected_type=dict, default=[])
2671 video_title = (
2672 get_first(video_details, 'title')
2673 or self._get_text(microformats, (..., 'title'))
2674 or search_meta(['og:title', 'twitter:title', 'title']))
2675 video_description = get_first(video_details, 'shortDescription')
2676
2677 if not smuggled_data.get('force_singlefeed', False):
2678 if not self.get_param('noplaylist'):
2679 multifeed_metadata_list = get_first(
2680 player_responses,
2681 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2682 expected_type=str)
2683 if multifeed_metadata_list:
2684 entries = []
2685 feed_ids = []
2686 for feed in multifeed_metadata_list.split(','):
2687 # Unquote should take place before split on comma (,) since textual
2688 # fields may contain comma as well (see
2689 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2690 feed_data = compat_parse_qs(
2691 compat_urllib_parse_unquote_plus(feed))
2692
2693 def feed_entry(name):
2694 return try_get(
2695 feed_data, lambda x: x[name][0], compat_str)
2696
2697 feed_id = feed_entry('id')
2698 if not feed_id:
2699 continue
2700 feed_title = feed_entry('title')
2701 title = video_title
2702 if feed_title:
2703 title += ' (%s)' % feed_title
2704 entries.append({
2705 '_type': 'url_transparent',
2706 'ie_key': 'Youtube',
2707 'url': smuggle_url(
2708 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2709 {'force_singlefeed': True}),
2710 'title': title,
2711 })
2712 feed_ids.append(feed_id)
2713 self.to_screen(
2714 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2715 % (', '.join(feed_ids), video_id))
2716 return self.playlist_result(
2717 entries, video_id, video_title, video_description)
2718 else:
2719 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2720
7ea65411 2721 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2722 is_live = get_first(video_details, 'isLive')
7ea65411 2723 if is_live is None:
2724 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2725
2726 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2727 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2728
545cc85d 2729 if not formats:
11f9be09 2730 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
b7da73eb 2731 self.raise_no_formats(
545cc85d 2732 'This video is DRM protected.', expected=True)
11f9be09 2733 pemr = get_first(
2734 playability_statuses,
2735 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2736 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2737 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2738 if subreason:
545cc85d 2739 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2740 countries = get_first(microformats, 'availableCountries')
545cc85d 2741 if not countries:
2742 regions_allowed = search_meta('regionsAllowed')
2743 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2744 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2745 reason += f'. {subreason}'
545cc85d 2746 if reason:
b7da73eb 2747 self.raise_no_formats(reason, expected=True)
bf1317d2 2748
11f9be09 2749 for f in formats:
2a9c6dcd 2750 if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
11f9be09 2751 f['source_preference'] = -10
3619f78d 2752 # TODO: this method is not reliable
2753 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
11f9be09 2754
2a9c6dcd 2755 # Source is given priority since formats that throttle are given lower source_preference
2756 # When throttling issue is fully fixed, remove this
2757 self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
bf1317d2 2758
11f9be09 2759 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2760 if not keywords and webpage:
2761 keywords = [
2762 unescapeHTML(m.group('content'))
2763 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2764 for keyword in keywords:
2765 if keyword.startswith('yt:stretch='):
201c1459 2766 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2767 if mobj:
2768 # NB: float is intentional for forcing float division
2769 w, h = (float(v) for v in mobj.groups())
2770 if w > 0 and h > 0:
2771 ratio = w / h
2772 for f in formats:
2773 if f.get('vcodec') != 'none':
2774 f['stretched_ratio'] = ratio
2775 break
6449cd80 2776
545cc85d 2777 thumbnails = []
11f9be09 2778 thumbnail_dicts = traverse_obj(
2779 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2780 expected_type=dict, default=[])
2781 for thumbnail in thumbnail_dicts:
2782 thumbnail_url = thumbnail.get('url')
2783 if not thumbnail_url:
2784 continue
2785 # Sometimes youtube gives a wrong thumbnail URL. See:
2786 # https://github.com/yt-dlp/yt-dlp/issues/233
2787 # https://github.com/ytdl-org/youtube-dl/issues/28023
2788 if 'maxresdefault' in thumbnail_url:
2789 thumbnail_url = thumbnail_url.split('?')[0]
2790 thumbnails.append({
2791 'url': thumbnail_url,
2792 'height': int_or_none(thumbnail.get('height')),
2793 'width': int_or_none(thumbnail.get('width')),
2794 })
ff2751ac 2795 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2796 if thumbnail_url:
2797 thumbnails.append({
2798 'url': thumbnail_url,
ff2751ac 2799 })
0ba692ac 2800 # The best resolution thumbnails sometimes does not appear in the webpage
2801 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2802 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2803 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
245524e6 2804 # TODO: Test them also? - For some videos, even these don't exist
cca80fe6 2805 guaranteed_thumbnail_names = [
2806 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2807 'mqdefault', 'mq1', 'mq2', 'mq3',
2808 'default', '1', '2', '3'
2809 ]
2810 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2811 n_thumbnail_names = len(thumbnail_names)
2812
0ba692ac 2813 thumbnails.extend({
2814 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2815 video_id=video_id, name=name, ext=ext,
2816 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2817 '_test_url': name in hq_thumbnail_names,
2818 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2819 for thumb in thumbnails:
cca80fe6 2820 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2821 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2822 self._remove_duplicate_formats(thumbnails)
545cc85d 2823
7ea65411 2824 category = get_first(microformats, 'category') or search_meta('genre')
2825 channel_id = str_or_none(
2826 get_first(video_details, 'channelId')
2827 or get_first(microformats, 'externalChannelId')
2828 or search_meta('channelId'))
2829 duration = int_or_none(
2830 get_first(video_details, 'lengthSeconds')
2831 or get_first(microformats, 'lengthSeconds')
2832 or parse_duration(search_meta('duration'))) or None
2833 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2834
2835 live_content = get_first(video_details, 'isLiveContent')
2836 is_upcoming = get_first(video_details, 'isUpcoming')
2837 if is_live is None:
2838 if is_upcoming or live_content is False:
2839 is_live = False
2840 if is_upcoming is None and (live_content or is_live):
2841 is_upcoming = False
2842 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2843 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2844 if not duration and live_endtime and live_starttime:
2845 duration = live_endtime - live_starttime
2846
545cc85d 2847 info = {
2848 'id': video_id,
2849 'title': self._live_title(video_title) if is_live else video_title,
2850 'formats': formats,
2851 'thumbnails': thumbnails,
2852 'description': video_description,
2853 'upload_date': unified_strdate(
11f9be09 2854 get_first(microformats, 'uploadDate')
545cc85d 2855 or search_meta('uploadDate')),
11f9be09 2856 'uploader': get_first(video_details, 'author'),
545cc85d 2857 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2858 'uploader_url': owner_profile_url,
2859 'channel_id': channel_id,
11f9be09 2860 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2861 'duration': duration,
2862 'view_count': int_or_none(
11f9be09 2863 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2864 or search_meta('interactionCount')),
11f9be09 2865 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2866 'age_limit': 18 if (
11f9be09 2867 get_first(microformats, 'isFamilySafe') is False
545cc85d 2868 or search_meta('isFamilyFriendly') == 'false'
2869 or search_meta('og:restrictions:age') == '18+') else 0,
2870 'webpage_url': webpage_url,
2871 'categories': [category] if category else None,
2872 'tags': keywords,
11f9be09 2873 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2874 'is_live': is_live,
2875 'was_live': (False if is_live or is_upcoming or live_content is False
2876 else None if is_live is None or is_upcoming is None
2877 else live_content),
2878 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2879 'release_timestamp': live_starttime,
545cc85d 2880 }
b477fc13 2881
3944e7af 2882 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2883 # Converted into dicts to remove duplicates
2884 captions = {
2885 sub.get('baseUrl'): sub
2886 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2887 translation_languages = {
2888 lang.get('languageCode'): lang.get('languageName')
2889 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
545cc85d 2890 subtitles = {}
2891 if pctr:
774d79cc 2892 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2893 lang_subs = container.setdefault(lang_code, [])
545cc85d 2894 for fmt in self._SUBTITLE_FORMATS:
2895 query.update({
2896 'fmt': fmt,
2897 })
2898 lang_subs.append({
2899 'ext': fmt,
2900 'url': update_url_query(base_url, query),
774d79cc 2901 'name': sub_name,
545cc85d 2902 })
7e72694b 2903
3944e7af 2904 for base_url, caption_track in captions.items():
545cc85d 2905 if not base_url:
2906 continue
2907 if caption_track.get('kind') != 'asr':
120916da 2908 lang_code = (
2909 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2910 or caption_track.get('languageCode'))
545cc85d 2911 if not lang_code:
2912 continue
2913 process_language(
774d79cc 2914 subtitles, base_url, lang_code,
3944e7af 2915 traverse_obj(caption_track, ('name', 'simpleText')),
774d79cc 2916 {})
545cc85d 2917 continue
2918 automatic_captions = {}
3944e7af 2919 for trans_code, trans_name in translation_languages.items():
2920 if not trans_code:
545cc85d 2921 continue
2922 process_language(
3944e7af 2923 automatic_captions, base_url, trans_code,
2924 self._get_text(trans_name, max_runs=1),
2925 {'tlang': trans_code})
545cc85d 2926 info['automatic_captions'] = automatic_captions
2927 info['subtitles'] = subtitles
7e72694b 2928
545cc85d 2929 parsed_url = compat_urllib_parse_urlparse(url)
2930 for component in [parsed_url.fragment, parsed_url.query]:
2931 query = compat_parse_qs(component)
2932 for k, v in query.items():
2933 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2934 d_k += '_time'
2935 if d_k not in info and k in s_ks:
2936 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2937
2938 # Youtube Music Auto-generated description
822b9d9c 2939 if video_description:
38d70284 2940 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2941 if mobj:
822b9d9c
RA
2942 release_year = mobj.group('release_year')
2943 release_date = mobj.group('release_date')
2944 if release_date:
2945 release_date = release_date.replace('-', '')
2946 if not release_year:
545cc85d 2947 release_year = release_date[:4]
2948 info.update({
2949 'album': mobj.group('album'.strip()),
2950 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2951 'track': mobj.group('track').strip(),
2952 'release_date': release_date,
cc2db878 2953 'release_year': int_or_none(release_year),
545cc85d 2954 })
7e72694b 2955
545cc85d 2956 initial_data = None
2957 if webpage:
2958 initial_data = self._extract_yt_initial_variable(
2959 webpage, self._YT_INITIAL_DATA_RE, video_id,
2960 'yt initial data')
2961 if not initial_data:
11f9be09 2962 headers = self.generate_api_headers(
2963 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2964 session_index=self._extract_session_index(master_ytcfg))
2965
109dd3b2 2966 initial_data = self._extract_response(
2967 item_id=video_id, ep='next', fatal=False,
11f9be09 2968 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
109dd3b2 2969 note='Downloading initial data API JSON')
545cc85d 2970
c60ee3a2 2971 try:
2972 # This will error if there is no livechat
2973 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2974 info['subtitles']['live_chat'] = [{
2975 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2976 'video_id': video_id,
2977 'ext': 'json',
f6745c49 2978 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2979 }]
2980 except (KeyError, IndexError, TypeError):
2981 pass
545cc85d 2982
2983 if initial_data:
7c365c21 2984 info['chapters'] = (
2985 self._extract_chapters_from_json(initial_data, duration)
2986 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2987 or None)
545cc85d 2988
2989 contents = try_get(
2990 initial_data,
2991 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2992 list) or []
2993 for content in contents:
2994 vpir = content.get('videoPrimaryInfoRenderer')
2995 if vpir:
2996 stl = vpir.get('superTitleLink')
2997 if stl:
fe93e2c4 2998 stl = self._get_text(stl)
545cc85d 2999 if try_get(
3000 vpir,
3001 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3002 info['location'] = stl
3003 else:
3004 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3005 if mobj:
3006 info.update({
3007 'series': mobj.group(1),
3008 'season_number': int(mobj.group(2)),
3009 'episode_number': int(mobj.group(3)),
3010 })
3011 for tlb in (try_get(
3012 vpir,
3013 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3014 list) or []):
3015 tbr = tlb.get('toggleButtonRenderer') or {}
3016 for getter, regex in [(
3017 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3018 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3019 lambda x: x['accessibility'],
3020 lambda x: x['accessibilityData']['accessibilityData'],
3021 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3022 label = (try_get(tbr, getter, dict) or {}).get('label')
3023 if label:
3024 mobj = re.match(regex, label)
3025 if mobj:
3026 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3027 break
3028 sbr_tooltip = try_get(
3029 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3030 if sbr_tooltip:
3031 like_count, dislike_count = sbr_tooltip.split(' / ')
3032 info.update({
3033 'like_count': str_to_int(like_count),
3034 'dislike_count': str_to_int(dislike_count),
3035 })
3036 vsir = content.get('videoSecondaryInfoRenderer')
3037 if vsir:
052e1350 3038 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3039 rows = try_get(
3040 vsir,
3041 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3042 list) or []
3043 multiple_songs = False
3044 for row in rows:
3045 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3046 multiple_songs = True
3047 break
3048 for row in rows:
3049 mrr = row.get('metadataRowRenderer') or {}
3050 mrr_title = mrr.get('title')
3051 if not mrr_title:
3052 continue
052e1350 3053 mrr_title = self._get_text(mrr, 'title')
3054 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3055 if mrr_title == 'License':
3056 info['license'] = mrr_contents_text
3057 elif not multiple_songs:
3058 if mrr_title == 'Album':
3059 info['album'] = mrr_contents_text
3060 elif mrr_title == 'Artist':
3061 info['artist'] = mrr_contents_text
3062 elif mrr_title == 'Song':
3063 info['track'] = mrr_contents_text
3064
3065 fallbacks = {
3066 'channel': 'uploader',
3067 'channel_id': 'uploader_id',
3068 'channel_url': 'uploader_url',
3069 }
3070 for to, frm in fallbacks.items():
3071 if not info.get(to):
3072 info[to] = info.get(frm)
3073
3074 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3075 v = info.get(s_k)
3076 if v:
3077 info[d_k] = v
b84071c0 3078
11f9be09 3079 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3080 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3081 is_membersonly = None
b28f8d24 3082 is_premium = None
c224251a
M
3083 if initial_data and is_private is not None:
3084 is_membersonly = False
b28f8d24 3085 is_premium = False
47193e02 3086 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3087 badge_labels = set()
3088 for content in contents:
3089 if not isinstance(content, dict):
3090 continue
3091 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3092 for badge_label in badge_labels:
3093 if badge_label.lower() == 'members only':
3094 is_membersonly = True
3095 elif badge_label.lower() == 'premium':
3096 is_premium = True
3097 elif badge_label.lower() == 'unlisted':
3098 is_unlisted = True
c224251a 3099
c224251a
M
3100 info['availability'] = self._availability(
3101 is_private=is_private,
b28f8d24 3102 needs_premium=is_premium,
c224251a
M
3103 needs_subscription=is_membersonly,
3104 needs_auth=info['age_limit'] >= 18,
3105 is_unlisted=None if is_private is None else is_unlisted)
3106
06167fbb 3107 # get xsrf for annotations or comments
a06916d9 3108 get_annotations = self.get_param('writeannotations', False)
3109 get_comments = self.get_param('getcomments', False)
06167fbb 3110 if get_annotations or get_comments:
29f7c58a 3111 xsrf_token = None
11f9be09 3112 if master_ytcfg:
3113 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
29f7c58a 3114 if not xsrf_token:
3115 xsrf_token = self._search_regex(
3116 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 3117 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 3118
3119 # annotations
06167fbb 3120 if get_annotations:
11f9be09 3121 invideo_url = get_first(
3122 player_responses,
3123 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3124 expected_type=str)
64b6a4e9 3125 if xsrf_token and invideo_url:
29f7c58a 3126 xsrf_field_name = None
11f9be09 3127 if master_ytcfg:
3128 xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
29f7c58a 3129 if not xsrf_field_name:
3130 xsrf_field_name = self._search_regex(
3131 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3132 webpage, 'xsrf field name',
29f7c58a 3133 group='xsrf_field_name', default='session_token')
8a784c74 3134 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3135 self._proto_relative_url(invideo_url),
3136 video_id, note='Downloading annotations',
3137 errnote='Unable to download video annotations', fatal=False,
3138 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3139
277d6ff5 3140 if get_comments:
11f9be09 3141 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3142
11f9be09 3143 self.mark_watched(video_id, player_responses)
d77ab8e2 3144
545cc85d 3145 return info
c5e8d7af 3146
5f6a1245 3147
8bdd16b4 3148class YoutubeTabIE(YoutubeBaseInfoExtractor):
3149 IE_DESC = 'YouTube.com tab'
70d5c17b 3150 _VALID_URL = r'''(?x)
3151 https?://
3152 (?:\w+\.)?
3153 (?:
3154 youtube(?:kids)?\.com|
3155 invidio\.us
3156 )/
3157 (?:
fe03a6cd 3158 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3159 (?P<not_channel>
9ba5705a 3160 feed/|hashtag/|
70d5c17b 3161 (?:playlist|watch)\?.*?\blist=
3162 )|
29f7c58a 3163 (?!(?:%s)\b) # Direct URLs
70d5c17b 3164 )
3165 (?P<id>[^/?\#&]+)
3166 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3167 IE_NAME = 'youtube:tab'
3168
81127aa5 3169 _TESTS = [{
da692b79 3170 'note': 'playlists, multipage',
8bdd16b4 3171 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3172 'playlist_mincount': 94,
3173 'info_dict': {
3174 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3175 'title': 'Игорь Клейнер - Playlists',
3176 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3177 'uploader': 'Игорь Клейнер',
3178 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3179 },
3180 }, {
da692b79 3181 'note': 'playlists, multipage, different order',
8bdd16b4 3182 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3183 'playlist_mincount': 94,
3184 'info_dict': {
3185 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3186 'title': 'Игорь Клейнер - Playlists',
3187 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3188 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3189 'uploader': 'Игорь Клейнер',
8bdd16b4 3190 },
201c1459 3191 }, {
da692b79 3192 'note': 'playlists, series',
201c1459 3193 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3194 'playlist_mincount': 5,
3195 'info_dict': {
3196 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3197 'title': '3Blue1Brown - Playlists',
3198 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3199 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3200 'uploader': '3Blue1Brown',
201c1459 3201 },
8bdd16b4 3202 }, {
da692b79 3203 'note': 'playlists, singlepage',
8bdd16b4 3204 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3205 'playlist_mincount': 4,
3206 'info_dict': {
3207 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3208 'title': 'ThirstForScience - Playlists',
3209 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3210 'uploader': 'ThirstForScience',
3211 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3212 }
3213 }, {
3214 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3215 'only_matching': True,
3216 }, {
da692b79 3217 'note': 'basic, single video playlist',
0e30a7b9 3218 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3219 'info_dict': {
0e30a7b9 3220 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3221 'uploader': 'Sergey M.',
3222 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3223 'title': 'youtube-dl public playlist',
81127aa5 3224 },
0e30a7b9 3225 'playlist_count': 1,
9291475f 3226 }, {
da692b79 3227 'note': 'empty playlist',
0e30a7b9 3228 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3229 'info_dict': {
0e30a7b9 3230 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3231 'uploader': 'Sergey M.',
3232 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3233 'title': 'youtube-dl empty playlist',
9291475f
PH
3234 },
3235 'playlist_count': 0,
3236 }, {
da692b79 3237 'note': 'Home tab',
8bdd16b4 3238 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3239 'info_dict': {
8bdd16b4 3240 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3241 'title': 'lex will - Home',
3242 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3243 'uploader': 'lex will',
3244 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3245 },
8bdd16b4 3246 'playlist_mincount': 2,
9291475f 3247 }, {
da692b79 3248 'note': 'Videos tab',
8bdd16b4 3249 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3250 'info_dict': {
8bdd16b4 3251 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3252 'title': 'lex will - Videos',
3253 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3254 'uploader': 'lex will',
3255 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3256 },
8bdd16b4 3257 'playlist_mincount': 975,
9291475f 3258 }, {
da692b79 3259 'note': 'Videos tab, sorted by popular',
8bdd16b4 3260 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3261 'info_dict': {
8bdd16b4 3262 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3263 'title': 'lex will - Videos',
3264 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3265 'uploader': 'lex will',
3266 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3267 },
8bdd16b4 3268 'playlist_mincount': 199,
9291475f 3269 }, {
da692b79 3270 'note': 'Playlists tab',
8bdd16b4 3271 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3272 'info_dict': {
8bdd16b4 3273 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3274 'title': 'lex will - Playlists',
3275 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3276 'uploader': 'lex will',
3277 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3278 },
8bdd16b4 3279 'playlist_mincount': 17,
ac7553d0 3280 }, {
da692b79 3281 'note': 'Community tab',
8bdd16b4 3282 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3283 'info_dict': {
8bdd16b4 3284 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3285 'title': 'lex will - Community',
3286 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3287 'uploader': 'lex will',
3288 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3289 },
3290 'playlist_mincount': 18,
87dadd45 3291 }, {
da692b79 3292 'note': 'Channels tab',
8bdd16b4 3293 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3294 'info_dict': {
8bdd16b4 3295 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3296 'title': 'lex will - Channels',
3297 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3298 'uploader': 'lex will',
3299 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3300 },
deaec5af 3301 'playlist_mincount': 12,
cd684175 3302 }, {
3303 'note': 'Search tab',
3304 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3305 'playlist_mincount': 40,
3306 'info_dict': {
3307 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3308 'title': '3Blue1Brown - Search - linear algebra',
3309 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3310 'uploader': '3Blue1Brown',
3311 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3312 },
6b08cdf6 3313 }, {
a0566bbf 3314 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3315 'only_matching': True,
3316 }, {
a0566bbf 3317 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3318 'only_matching': True,
3319 }, {
a0566bbf 3320 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3321 'only_matching': True,
3322 }, {
3323 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3324 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3325 'info_dict': {
3326 'title': '29C3: Not my department',
3327 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3328 'uploader': 'Christiaan008',
3329 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3330 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3331 },
3332 'playlist_count': 96,
3333 }, {
3334 'note': 'Large playlist',
3335 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3336 'info_dict': {
8bdd16b4 3337 'title': 'Uploads from Cauchemar',
3338 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3339 'uploader': 'Cauchemar',
3340 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3341 },
8bdd16b4 3342 'playlist_mincount': 1123,
3343 }, {
da692b79 3344 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3345 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3346 'only_matching': True,
4b7df0d3
JMF
3347 }, {
3348 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3349 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3350 'info_dict': {
acf757f4
PH
3351 'title': 'Uploads from Interstellar Movie',
3352 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3353 'uploader': 'Interstellar Movie',
8bdd16b4 3354 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3355 },
481cc733 3356 'playlist_mincount': 21,
358de58c 3357 }, {
3358 'note': 'Playlist with "show unavailable videos" button',
3359 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3360 'info_dict': {
3361 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3362 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3363 'uploader': 'Phim Siêu Nhân Nhật Bản',
3364 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3365 },
da692b79 3366 'playlist_mincount': 200,
5d342002 3367 }, {
da692b79 3368 'note': 'Playlist with unavailable videos in page 7',
5d342002 3369 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3370 'info_dict': {
3371 'title': 'Uploads from BlankTV',
3372 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3373 'uploader': 'BlankTV',
3374 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3375 },
da692b79 3376 'playlist_mincount': 1000,
8bdd16b4 3377 }, {
da692b79 3378 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3379 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3380 'info_dict': {
3381 'title': 'Data Analysis with Dr Mike Pound',
3382 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3383 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3384 'uploader': 'Computerphile',
deaec5af 3385 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3386 },
3387 'playlist_mincount': 11,
3388 }, {
a0566bbf 3389 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3390 'only_matching': True,
dacb3a86 3391 }, {
da692b79 3392 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3393 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3394 'info_dict': {
3395 'id': 'FqZTN594JQw',
3396 'ext': 'webm',
3397 'title': "Smiley's People 01 detective, Adventure Series, Action",
3398 'uploader': 'STREEM',
3399 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3400 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3401 'upload_date': '20150526',
3402 'license': 'Standard YouTube License',
3403 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3404 'categories': ['People & Blogs'],
3405 'tags': list,
dbdaaa23 3406 'view_count': int,
dacb3a86
S
3407 'like_count': int,
3408 'dislike_count': int,
3409 },
3410 'params': {
3411 'skip_download': True,
3412 },
13a75688 3413 'skip': 'This video is not available.',
dacb3a86 3414 'add_ie': [YoutubeIE.ie_key()],
481cc733 3415 }, {
8bdd16b4 3416 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3417 'only_matching': True,
66b48727 3418 }, {
8bdd16b4 3419 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3420 'only_matching': True,
a0566bbf 3421 }, {
3422 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3423 'info_dict': {
11f9be09 3424 'id': 'FMtPN8yp5LU', # This will keep changing
a0566bbf 3425 'ext': 'mp4',
deaec5af 3426 'title': compat_str,
a0566bbf 3427 'uploader': 'Sky News',
3428 'uploader_id': 'skynews',
3429 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3430 'upload_date': r're:\d{8}',
3431 'description': compat_str,
a0566bbf 3432 'categories': ['News & Politics'],
3433 'tags': list,
3434 'like_count': int,
3435 'dislike_count': int,
3436 },
3437 'params': {
3438 'skip_download': True,
3439 },
da692b79 3440 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3441 }, {
3442 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3443 'info_dict': {
3444 'id': 'a48o2S1cPoo',
3445 'ext': 'mp4',
3446 'title': 'The Young Turks - Live Main Show',
3447 'uploader': 'The Young Turks',
3448 'uploader_id': 'TheYoungTurks',
3449 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3450 'upload_date': '20150715',
3451 'license': 'Standard YouTube License',
3452 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3453 'categories': ['News & Politics'],
3454 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3455 'like_count': int,
3456 'dislike_count': int,
3457 },
3458 'params': {
3459 'skip_download': True,
3460 },
3461 'only_matching': True,
3462 }, {
3463 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3464 'only_matching': True,
3465 }, {
3466 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3467 'only_matching': True,
09f1580e 3468 }, {
3469 'note': 'A channel that is not live. Should raise error',
3470 'url': 'https://www.youtube.com/user/numberphile/live',
3471 'only_matching': True,
3d3dddc9 3472 }, {
3473 'url': 'https://www.youtube.com/feed/trending',
3474 'only_matching': True,
3475 }, {
3d3dddc9 3476 'url': 'https://www.youtube.com/feed/library',
3477 'only_matching': True,
3478 }, {
3d3dddc9 3479 'url': 'https://www.youtube.com/feed/history',
3480 'only_matching': True,
3481 }, {
3d3dddc9 3482 'url': 'https://www.youtube.com/feed/subscriptions',
3483 'only_matching': True,
3484 }, {
3d3dddc9 3485 'url': 'https://www.youtube.com/feed/watch_later',
3486 'only_matching': True,
3487 }, {
da692b79 3488 'note': 'Recommended - redirects to home page',
3d3dddc9 3489 'url': 'https://www.youtube.com/feed/recommended',
3490 'only_matching': True,
29f7c58a 3491 }, {
da692b79 3492 'note': 'inline playlist with not always working continuations',
29f7c58a 3493 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3494 'only_matching': True,
3495 }, {
3496 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3497 'only_matching': True,
3498 }, {
3499 'url': 'https://www.youtube.com/course',
3500 'only_matching': True,
3501 }, {
3502 'url': 'https://www.youtube.com/zsecurity',
3503 'only_matching': True,
3504 }, {
3505 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3506 'only_matching': True,
3507 }, {
3508 'url': 'https://www.youtube.com/TheYoungTurks/live',
3509 'only_matching': True,
39ed931e 3510 }, {
3511 'url': 'https://www.youtube.com/hashtag/cctv9',
3512 'info_dict': {
3513 'id': 'cctv9',
3514 'title': '#cctv9',
3515 },
3516 'playlist_mincount': 350,
201c1459 3517 }, {
3518 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3519 'only_matching': True,
9297939e 3520 }, {
da692b79 3521 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3522 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3523 'only_matching': True
fe03a6cd 3524 }, {
3525 'note': '/browse/ should redirect to /channel/',
3526 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3527 'only_matching': True
3528 }, {
3529 'note': 'VLPL, should redirect to playlist?list=PL...',
3530 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3531 'info_dict': {
3532 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3533 'uploader': 'NoCopyrightSounds',
3534 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3535 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3536 'title': 'NCS Releases',
3537 },
3538 'playlist_mincount': 166,
18db7548 3539 }, {
3540 'note': 'Topic, should redirect to playlist?list=UU...',
3541 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3542 'info_dict': {
3543 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3544 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3545 'title': 'Uploads from Royalty Free Music - Topic',
3546 'uploader': 'Royalty Free Music - Topic',
3547 },
3548 'expected_warnings': [
3549 'A channel/user page was given',
3550 'The URL does not have a videos tab',
3551 ],
3552 'playlist_mincount': 101,
3553 }, {
3554 'note': 'Topic without a UU playlist',
3555 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3556 'info_dict': {
3557 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3558 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3559 },
3560 'expected_warnings': [
3561 'A channel/user page was given',
3562 'The URL does not have a videos tab',
3563 'Falling back to channel URL',
3564 ],
3565 'playlist_mincount': 9,
abcdd12b 3566 }, {
3567 'note': 'Youtube music Album',
3568 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3569 'info_dict': {
3570 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3571 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3572 },
3573 'playlist_count': 50,
47193e02 3574 }, {
3575 'note': 'unlisted single video playlist',
3576 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3577 'info_dict': {
3578 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3579 'uploader': 'colethedj',
3580 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3581 'title': 'yt-dlp unlisted playlist test',
3582 'availability': 'unlisted'
3583 },
3584 'playlist_count': 1,
29f7c58a 3585 }]
3586
3587 @classmethod
3588 def suitable(cls, url):
3589 return False if YoutubeIE.suitable(url) else super(
3590 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3591
3592 def _extract_channel_id(self, webpage):
3593 channel_id = self._html_search_meta(
3594 'channelId', webpage, 'channel id', default=None)
3595 if channel_id:
3596 return channel_id
3597 channel_url = self._html_search_meta(
3598 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3599 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3600 'twitter:app:url:googleplay'), webpage, 'channel url')
3601 return self._search_regex(
3602 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3603 channel_url, 'channel id')
15f6397c 3604
8bdd16b4 3605 @staticmethod
cd7c66cf 3606 def _extract_basic_item_renderer(item):
3607 # Modified from _extract_grid_item_renderer
201c1459 3608 known_basic_renderers = (
3609 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3610 )
3611 for key, renderer in item.items():
201c1459 3612 if not isinstance(renderer, dict):
cd7c66cf 3613 continue
201c1459 3614 elif key in known_basic_renderers:
3615 return renderer
3616 elif key.startswith('grid') and key.endswith('Renderer'):
3617 return renderer
8bdd16b4 3618
8bdd16b4 3619 def _grid_entries(self, grid_renderer):
3620 for item in grid_renderer['items']:
3621 if not isinstance(item, dict):
39b62db1 3622 continue
cd7c66cf 3623 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3624 if not isinstance(renderer, dict):
3625 continue
052e1350 3626 title = self._get_text(renderer, 'title')
fe93e2c4 3627
8bdd16b4 3628 # playlist
3629 playlist_id = renderer.get('playlistId')
3630 if playlist_id:
3631 yield self.url_result(
3632 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3633 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3634 video_title=title)
201c1459 3635 continue
8bdd16b4 3636 # video
3637 video_id = renderer.get('videoId')
3638 if video_id:
3639 yield self._extract_video(renderer)
201c1459 3640 continue
8bdd16b4 3641 # channel
3642 channel_id = renderer.get('channelId')
3643 if channel_id:
8bdd16b4 3644 yield self.url_result(
3645 'https://www.youtube.com/channel/%s' % channel_id,
3646 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3647 continue
3648 # generic endpoint URL support
3649 ep_url = urljoin('https://www.youtube.com/', try_get(
3650 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3651 compat_str))
3652 if ep_url:
3653 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3654 if ie.suitable(ep_url):
3655 yield self.url_result(
3656 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3657 break
8bdd16b4 3658
3d3dddc9 3659 def _shelf_entries_from_content(self, shelf_renderer):
3660 content = shelf_renderer.get('content')
3661 if not isinstance(content, dict):
8bdd16b4 3662 return
cd7c66cf 3663 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3664 if renderer:
3665 # TODO: add support for nested playlists so each shelf is processed
3666 # as separate playlist
3667 # TODO: this includes only first N items
3668 for entry in self._grid_entries(renderer):
3669 yield entry
3670 renderer = content.get('horizontalListRenderer')
3671 if renderer:
3672 # TODO
3673 pass
8bdd16b4 3674
29f7c58a 3675 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3676 ep = try_get(
3677 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3678 compat_str)
3679 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3680 if shelf_url:
29f7c58a 3681 # Skipping links to another channels, note that checking for
3682 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3683 # will not work
3684 if skip_channels and '/channels?' in shelf_url:
3685 return
052e1350 3686 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3687 yield self.url_result(shelf_url, video_title=title)
3688 # Shelf may not contain shelf URL, fallback to extraction from content
3689 for entry in self._shelf_entries_from_content(shelf_renderer):
3690 yield entry
c5e8d7af 3691
8bdd16b4 3692 def _playlist_entries(self, video_list_renderer):
3693 for content in video_list_renderer['contents']:
3694 if not isinstance(content, dict):
3695 continue
3696 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3697 if not isinstance(renderer, dict):
3698 continue
3699 video_id = renderer.get('videoId')
3700 if not video_id:
3701 continue
3702 yield self._extract_video(renderer)
07aeced6 3703
3462ffa8 3704 def _rich_entries(self, rich_grid_renderer):
3705 renderer = try_get(
70d5c17b 3706 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3707 video_id = renderer.get('videoId')
3708 if not video_id:
3709 return
3710 yield self._extract_video(renderer)
3711
8bdd16b4 3712 def _video_entry(self, video_renderer):
3713 video_id = video_renderer.get('videoId')
3714 if video_id:
3715 return self._extract_video(video_renderer)
dacb3a86 3716
8bdd16b4 3717 def _post_thread_entries(self, post_thread_renderer):
3718 post_renderer = try_get(
3719 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3720 if not post_renderer:
3721 return
3722 # video attachment
3723 video_renderer = try_get(
895b0931 3724 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3725 video_id = video_renderer.get('videoId')
3726 if video_id:
3727 entry = self._extract_video(video_renderer)
8bdd16b4 3728 if entry:
3729 yield entry
895b0931 3730 # playlist attachment
3731 playlist_id = try_get(
3732 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3733 if playlist_id:
3734 yield self.url_result(
e28f1c0a 3735 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3736 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3737 # inline video links
3738 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3739 for run in runs:
3740 if not isinstance(run, dict):
3741 continue
3742 ep_url = try_get(
3743 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3744 if not ep_url:
3745 continue
3746 if not YoutubeIE.suitable(ep_url):
3747 continue
3748 ep_video_id = YoutubeIE._match_id(ep_url)
3749 if video_id == ep_video_id:
3750 continue
895b0931 3751 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3752
8bdd16b4 3753 def _post_thread_continuation_entries(self, post_thread_continuation):
3754 contents = post_thread_continuation.get('contents')
3755 if not isinstance(contents, list):
3756 return
3757 for content in contents:
3758 renderer = content.get('backstagePostThreadRenderer')
3759 if not isinstance(renderer, dict):
3760 continue
3761 for entry in self._post_thread_entries(renderer):
3762 yield entry
07aeced6 3763
39ed931e 3764 r''' # unused
3765 def _rich_grid_entries(self, contents):
3766 for content in contents:
3767 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3768 if video_renderer:
3769 entry = self._video_entry(video_renderer)
3770 if entry:
3771 yield entry
3772 '''
f4f751af 3773 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3774
70d5c17b 3775 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3776 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3777 for content in contents:
3778 if not isinstance(content, dict):
8bdd16b4 3779 continue
70d5c17b 3780 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3781 if not is_renderer:
70d5c17b 3782 renderer = content.get('richItemRenderer')
3462ffa8 3783 if renderer:
3784 for entry in self._rich_entries(renderer):
3785 yield entry
3786 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3787 continue
3462ffa8 3788 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3789 for isr_content in isr_contents:
3790 if not isinstance(isr_content, dict):
3791 continue
69184e41 3792
3793 known_renderers = {
3794 'playlistVideoListRenderer': self._playlist_entries,
3795 'gridRenderer': self._grid_entries,
3796 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3797 'backstagePostThreadRenderer': self._post_thread_entries,
3798 'videoRenderer': lambda x: [self._video_entry(x)],
3799 }
3800 for key, renderer in isr_content.items():
3801 if key not in known_renderers:
3802 continue
3803 for entry in known_renderers[key](renderer):
3804 if entry:
3805 yield entry
3462ffa8 3806 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3807 break
70d5c17b 3808
3462ffa8 3809 if not continuation_list[0]:
3810 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3811
3812 if not continuation_list[0]:
3813 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3814
3815 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3816 tab_content = try_get(tab, lambda x: x['content'], dict)
3817 if not tab_content:
3818 return
3462ffa8 3819 parent_renderer = (
29f7c58a 3820 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3821 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3822 for entry in extract_entries(parent_renderer):
3823 yield entry
3462ffa8 3824 continuation = continuation_list[0]
fe93e2c4 3825 visitor_data = None
d069eca7 3826
8bdd16b4 3827 for page_num in itertools.count(1):
3828 if not continuation:
3829 break
11f9be09 3830 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3831 response = self._extract_response(
3832 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3833 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3834 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3835
3836 if not response:
8bdd16b4 3837 break
f4f751af 3838 visitor_data = try_get(
3839 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3840
69184e41 3841 known_continuation_renderers = {
3842 'playlistVideoListContinuation': self._playlist_entries,
3843 'gridContinuation': self._grid_entries,
3844 'itemSectionContinuation': self._post_thread_continuation_entries,
3845 'sectionListContinuation': extract_entries, # for feeds
3846 }
8bdd16b4 3847 continuation_contents = try_get(
69184e41 3848 response, lambda x: x['continuationContents'], dict) or {}
3849 continuation_renderer = None
3850 for key, value in continuation_contents.items():
3851 if key not in known_continuation_renderers:
3462ffa8 3852 continue
69184e41 3853 continuation_renderer = value
3854 continuation_list = [None]
3855 for entry in known_continuation_renderers[key](continuation_renderer):
3856 yield entry
3857 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3858 break
3859 if continuation_renderer:
3860 continue
c5e8d7af 3861
a1b535bd 3862 known_renderers = {
3863 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3864 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3865 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3866 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3867 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3868 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3869 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3870 }
cce889b9 3871 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3872 continuation_items = try_get(
cce889b9 3873 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3874 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3875 video_items_renderer = None
3876 for key, value in continuation_item.items():
3877 if key not in known_renderers:
8bdd16b4 3878 continue
a1b535bd 3879 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3880 continuation_list = [None]
a1b535bd 3881 for entry in known_renderers[key][0](video_items_renderer):
3882 yield entry
9ba5705a 3883 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3884 break
3885 if video_items_renderer:
3886 continue
8bdd16b4 3887 break
9558dcec 3888
8bdd16b4 3889 @staticmethod
3890 def _extract_selected_tab(tabs):
3891 for tab in tabs:
cd684175 3892 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3893 if renderer.get('selected') is True:
3894 return renderer
2b3c2546 3895 else:
8bdd16b4 3896 raise ExtractorError('Unable to find selected tab')
b82f815f 3897
47193e02 3898 @classmethod
3899 def _extract_uploader(cls, data):
8bdd16b4 3900 uploader = {}
47193e02 3901 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3902 owner = try_get(
3903 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3904 if owner:
3905 uploader['uploader'] = owner.get('text')
3906 uploader['uploader_id'] = try_get(
3907 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3908 uploader['uploader_url'] = urljoin(
3909 'https://www.youtube.com/',
3910 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3911 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3912
d069eca7 3913 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3914 playlist_id = title = description = channel_url = channel_name = channel_id = None
3915 thumbnails_list = tags = []
3916
8bdd16b4 3917 selected_tab = self._extract_selected_tab(tabs)
3918 renderer = try_get(
3919 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3920 if renderer:
b60419c5 3921 channel_name = renderer.get('title')
3922 channel_url = renderer.get('channelUrl')
3923 channel_id = renderer.get('externalId')
39ed931e 3924 else:
64c0d954 3925 renderer = try_get(
3926 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3927
8bdd16b4 3928 if renderer:
3929 title = renderer.get('title')
ecc97af3 3930 description = renderer.get('description', '')
b60419c5 3931 playlist_id = channel_id
3932 tags = renderer.get('keywords', '').split()
3933 thumbnails_list = (
3934 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3935 or try_get(
47193e02 3936 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3937 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3938 list)
b60419c5 3939 or [])
3940
3941 thumbnails = []
3942 for t in thumbnails_list:
3943 if not isinstance(t, dict):
3944 continue
3945 thumbnail_url = url_or_none(t.get('url'))
3946 if not thumbnail_url:
3947 continue
3948 thumbnails.append({
3949 'url': thumbnail_url,
3950 'width': int_or_none(t.get('width')),
3951 'height': int_or_none(t.get('height')),
3952 })
3462ffa8 3953 if playlist_id is None:
70d5c17b 3954 playlist_id = item_id
3955 if title is None:
39ed931e 3956 title = (
3957 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3958 or playlist_id)
b60419c5 3959 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3960 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3961 metadata = {
3962 'playlist_id': playlist_id,
3963 'playlist_title': title,
3964 'playlist_description': description,
3965 'uploader': channel_name,
3966 'uploader_id': channel_id,
3967 'uploader_url': channel_url,
3968 'thumbnails': thumbnails,
3969 'tags': tags,
3970 }
47193e02 3971 availability = self._extract_availability(data)
3972 if availability:
3973 metadata['availability'] = availability
b60419c5 3974 if not channel_id:
3975 metadata.update(self._extract_uploader(data))
3976 metadata.update({
3977 'channel': metadata['uploader'],
3978 'channel_id': metadata['uploader_id'],
3979 'channel_url': metadata['uploader_url']})
11f9be09 3980 ytcfg = self.extract_ytcfg(item_id, webpage)
b60419c5 3981 return self.playlist_result(
d069eca7
M
3982 self._entries(
3983 selected_tab, playlist_id,
3984 self._extract_identity_token(webpage, item_id),
fe93e2c4 3985 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 3986 **metadata)
73c4ac2c 3987
79360d99 3988 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3989 first_id = last_id = None
11f9be09 3990 ytcfg = self.extract_ytcfg(playlist_id, webpage)
3991 headers = self.generate_api_headers(
fe93e2c4 3992 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3993 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 3994 for page_num in itertools.count(1):
cd7c66cf 3995 videos = list(self._playlist_entries(playlist))
3996 if not videos:
3997 return
2be71994 3998 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3999 if start >= len(videos):
4000 return
4001 for video in videos[start:]:
4002 if video['id'] == first_id:
4003 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4004 return
4005 yield video
4006 first_id = first_id or videos[0]['id']
4007 last_id = videos[-1]['id']
79360d99 4008 watch_endpoint = try_get(
4009 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4010 query = {
4011 'playlistId': playlist_id,
4012 'videoId': watch_endpoint.get('videoId') or last_id,
4013 'index': watch_endpoint.get('index') or len(videos),
4014 'params': watch_endpoint.get('params') or 'OAE%3D'
4015 }
4016 response = self._extract_response(
4017 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4018 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4019 check_get_keys='contents'
4020 )
cd7c66cf 4021 playlist = try_get(
79360d99 4022 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4023
79360d99 4024 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 4025 title = playlist.get('title') or try_get(
4026 data, lambda x: x['titleText']['simpleText'], compat_str)
4027 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4028
4029 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4030 playlist_url = urljoin(url, try_get(
4031 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4032 compat_str))
4033 if playlist_url and playlist_url != url:
4034 return self.url_result(
4035 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4036 video_title=title)
cd7c66cf 4037
8bdd16b4 4038 return self.playlist_result(
79360d99 4039 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 4040 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4041
47193e02 4042 def _extract_availability(self, data):
4043 """
4044 Gets the availability of a given playlist/tab.
4045 Note: Unless YouTube tells us explicitly, we do not assume it is public
4046 @param data: response
4047 """
4048 is_private = is_unlisted = None
4049 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4050 badge_labels = self._extract_badges(renderer)
4051
4052 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4053 privacy_dropdown_entries = try_get(
4054 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4055 for renderer_dict in privacy_dropdown_entries:
4056 is_selected = try_get(
4057 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4058 if not is_selected:
4059 continue
052e1350 4060 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4061 if label:
4062 badge_labels.add(label.lower())
4063 break
4064
4065 for badge_label in badge_labels:
4066 if badge_label == 'unlisted':
4067 is_unlisted = True
4068 elif badge_label == 'private':
4069 is_private = True
4070 elif badge_label == 'public':
4071 is_unlisted = is_private = False
4072 return self._availability(is_private, False, False, False, is_unlisted)
4073
4074 @staticmethod
4075 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4076 sidebar_renderer = try_get(
4077 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4078 for item in sidebar_renderer:
4079 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4080 if renderer:
4081 return renderer
4082
358de58c 4083 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4084 """
4085 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4086 """
5d342002 4087 browse_id = params = None
47193e02 4088 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4089 if not renderer:
4090 return
4091 menu_renderer = try_get(
4092 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4093 for menu_item in menu_renderer:
4094 if not isinstance(menu_item, dict):
358de58c 4095 continue
47193e02 4096 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4097 text = try_get(
4098 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4099 if not text or text.lower() != 'show unavailable videos':
4100 continue
4101 browse_endpoint = try_get(
4102 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4103 browse_id = browse_endpoint.get('browseId')
4104 params = browse_endpoint.get('params')
4105 break
5d342002 4106
11f9be09 4107 ytcfg = self.extract_ytcfg(item_id, webpage)
4108 headers = self.generate_api_headers(
fe93e2c4 4109 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 4110 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4111 visitor_data=try_get(
4112 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4113 query = {
4114 'params': params or 'wgYCCAA=',
4115 'browseId': browse_id or 'VL%s' % item_id
4116 }
4117 return self._extract_response(
4118 item_id=item_id, headers=headers, query=query,
fe93e2c4 4119 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4120 note='Downloading API JSON with unavailable videos')
358de58c 4121
cd7c66cf 4122 def _extract_webpage(self, url, item_id):
a06916d9 4123 retries = self.get_param('extractor_retries', 3)
62bff2c1 4124 count = -1
c705177d 4125 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4126 while count < retries:
62bff2c1 4127 count += 1
14fdfea9 4128 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4129 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4130 if count:
c705177d 4131 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4132 webpage = self._download_webpage(
4133 url, item_id,
cd7c66cf 4134 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
11f9be09 4135 data = self.extract_yt_initial_data(item_id, webpage)
14fdfea9 4136 if data.get('contents') or data.get('currentVideoEndpoint'):
4137 break
95c01b6c 4138 # Extract alerts here only when there is error
4139 self._extract_and_report_alerts(data)
c705177d 4140 if count >= retries:
6a39ee13 4141 raise ExtractorError(last_error)
cd7c66cf 4142 return webpage, data
4143
9297939e 4144 @staticmethod
4145 def _smuggle_data(entries, data):
4146 for entry in entries:
4147 if data:
4148 entry['url'] = smuggle_url(entry['url'], data)
4149 yield entry
4150
cd7c66cf 4151 def _real_extract(self, url):
9297939e 4152 url, smuggled_data = unsmuggle_url(url, {})
4153 if self.is_music_url(url):
4154 smuggled_data['is_music_url'] = True
fe03a6cd 4155 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4156 if info_dict.get('entries'):
4157 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4158 return info_dict
4159
fe03a6cd 4160 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4161
4162 def __real_extract(self, url, smuggled_data):
cd7c66cf 4163 item_id = self._match_id(url)
4164 url = compat_urlparse.urlunparse(
4165 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4166 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4167
fe03a6cd 4168 def get_mobj(url):
4169 mobj = self._url_re.match(url).groupdict()
07cce701 4170 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4171 return mobj
4172
4173 mobj = get_mobj(url)
4174 # Youtube returns incomplete data if tabname is not lower case
4175 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4176
4177 if is_channel:
4178 if smuggled_data.get('is_music_url'):
4179 if item_id[:2] == 'VL':
4180 # Youtube music VL channels have an equivalent playlist
4181 item_id = item_id[2:]
4182 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4183 elif item_id[:2] == 'MP':
4184 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4185 item_id = self._search_regex(
4186 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4187 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4188 'playlist id')
4189 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4190 elif mobj['channel_type'] == 'browse':
4191 # Youtube music /browse/ should be changed to /channel/
4192 pre = 'https://www.youtube.com/channel/%s' % item_id
4193 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4194 # Home URLs should redirect to /videos/
6a39ee13 4195 self.report_warning(
cd7c66cf 4196 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4197 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4198 tab = '/videos'
4199
4200 url = ''.join((pre, tab, post))
4201 mobj = get_mobj(url)
cd7c66cf 4202
4203 # Handle both video/playlist URLs
201c1459 4204 qs = parse_qs(url)
cd7c66cf 4205 video_id = qs.get('v', [None])[0]
4206 playlist_id = qs.get('list', [None])[0]
4207
fe03a6cd 4208 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4209 if not playlist_id:
fe03a6cd 4210 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4211 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4212 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4213 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4214 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4215 mobj = get_mobj(url)
cd7c66cf 4216
4217 if video_id and playlist_id:
a06916d9 4218 if self.get_param('noplaylist'):
cd7c66cf 4219 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4220 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4221 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4222
4223 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4224
18db7548 4225 tabs = try_get(
4226 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4227 if tabs:
4228 selected_tab = self._extract_selected_tab(tabs)
4229 tab_name = selected_tab.get('title', '')
09f1580e 4230 if 'no-youtube-channel-redirect' not in compat_opts:
4231 if mobj['tab'] == '/live':
4232 # Live tab should have redirected to the video
4233 raise ExtractorError('The channel is not currently live', expected=True)
4234 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4235 if not mobj['not_channel'] and item_id[:2] == 'UC':
4236 # Topic channels don't have /videos. Use the equivalent playlist instead
4237 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4238 pl_id = 'UU%s' % item_id[2:]
4239 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4240 try:
4241 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4242 for alert_type, alert_message in self._extract_alerts(pl_data):
4243 if alert_type == 'error':
4244 raise ExtractorError('Youtube said: %s' % alert_message)
4245 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4246 except ExtractorError:
4247 self.report_warning('The playlist gave error. Falling back to channel URL')
4248 else:
4249 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4250
4251 self.write_debug('Final URL: %s' % url)
4252
358de58c 4253 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4254 if 'no-youtube-unavailable-videos' not in compat_opts:
4255 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4256 self._extract_and_report_alerts(data)
8bdd16b4 4257 tabs = try_get(
4258 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4259 if tabs:
d069eca7 4260 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4261
8bdd16b4 4262 playlist = try_get(
4263 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4264 if playlist:
79360d99 4265 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4266
a0566bbf 4267 video_id = try_get(
4268 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4269 compat_str) or video_id
8bdd16b4 4270 if video_id:
09f1580e 4271 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4272 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4273 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4274
8bdd16b4 4275 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4276
c5e8d7af 4277
8bdd16b4 4278class YoutubePlaylistIE(InfoExtractor):
4279 IE_DESC = 'YouTube.com playlists'
4280 _VALID_URL = r'''(?x)(?:
4281 (?:https?://)?
4282 (?:\w+\.)?
4283 (?:
4284 (?:
4285 youtube(?:kids)?\.com|
29f7c58a 4286 invidio\.us
8bdd16b4 4287 )
4288 /.*?\?.*?\blist=
4289 )?
4290 (?P<id>%(playlist_id)s)
4291 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4292 IE_NAME = 'youtube:playlist'
cdc628a4 4293 _TESTS = [{
8bdd16b4 4294 'note': 'issue #673',
4295 'url': 'PLBB231211A4F62143',
cdc628a4 4296 'info_dict': {
8bdd16b4 4297 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4298 'id': 'PLBB231211A4F62143',
4299 'uploader': 'Wickydoo',
4300 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4301 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4302 },
4303 'playlist_mincount': 29,
4304 }, {
4305 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4306 'info_dict': {
4307 'title': 'YDL_safe_search',
4308 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4309 },
4310 'playlist_count': 2,
4311 'skip': 'This playlist is private',
9558dcec 4312 }, {
8bdd16b4 4313 'note': 'embedded',
4314 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4315 'playlist_count': 4,
9558dcec 4316 'info_dict': {
8bdd16b4 4317 'title': 'JODA15',
4318 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4319 'uploader': 'milan',
4320 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4321 }
cdc628a4 4322 }, {
8bdd16b4 4323 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4324 'playlist_mincount': 654,
8bdd16b4 4325 'info_dict': {
4326 'title': '2018 Chinese New Singles (11/6 updated)',
4327 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4328 'uploader': 'LBK',
4329 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4330 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4331 }
daa0df9e 4332 }, {
29f7c58a 4333 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4334 'only_matching': True,
4335 }, {
4336 # music album playlist
4337 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4338 'only_matching': True,
4339 }]
4340
4341 @classmethod
4342 def suitable(cls, url):
201c1459 4343 if YoutubeTabIE.suitable(url):
4344 return False
1bdae7d3 4345 # Hack for lazy extractors until more generic solution is implemented
4346 # (see #28780)
4347 from .youtube import parse_qs
201c1459 4348 qs = parse_qs(url)
4349 if qs.get('v', [None])[0]:
4350 return False
4351 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4352
4353 def _real_extract(self, url):
4354 playlist_id = self._match_id(url)
46953e7e 4355 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4356 url = update_url_query(
4357 'https://www.youtube.com/playlist',
4358 parse_qs(url) or {'list': playlist_id})
4359 if is_music_url:
4360 url = smuggle_url(url, {'is_music_url': True})
4361 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4362
4363
4364class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4365 IE_DESC = 'youtu.be'
29f7c58a 4366 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4367 _TESTS = [{
8bdd16b4 4368 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4369 'info_dict': {
4370 'id': 'yeWKywCrFtk',
4371 'ext': 'mp4',
4372 'title': 'Small Scale Baler and Braiding Rugs',
4373 'uploader': 'Backus-Page House Museum',
4374 'uploader_id': 'backuspagemuseum',
4375 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4376 'upload_date': '20161008',
4377 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4378 'categories': ['Nonprofits & Activism'],
4379 'tags': list,
4380 'like_count': int,
4381 'dislike_count': int,
4382 },
4383 'params': {
4384 'noplaylist': True,
4385 'skip_download': True,
4386 },
39e7107d 4387 }, {
8bdd16b4 4388 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4389 'only_matching': True,
cdc628a4
PH
4390 }]
4391
8bdd16b4 4392 def _real_extract(self, url):
29f7c58a 4393 mobj = re.match(self._VALID_URL, url)
4394 video_id = mobj.group('id')
4395 playlist_id = mobj.group('playlist_id')
8bdd16b4 4396 return self.url_result(
29f7c58a 4397 update_url_query('https://www.youtube.com/watch', {
4398 'v': video_id,
4399 'list': playlist_id,
4400 'feature': 'youtu.be',
4401 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4402
4403
4404class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4405 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4406 _VALID_URL = r'ytuser:(?P<id>.+)'
4407 _TESTS = [{
4408 'url': 'ytuser:phihag',
4409 'only_matching': True,
4410 }]
4411
4412 def _real_extract(self, url):
4413 user_id = self._match_id(url)
4414 return self.url_result(
4415 'https://www.youtube.com/user/%s' % user_id,
4416 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4417
b05654f0 4418
3d3dddc9 4419class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4420 IE_NAME = 'youtube:favorites'
4421 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4422 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4423 _LOGIN_REQUIRED = True
4424 _TESTS = [{
4425 'url': ':ytfav',
4426 'only_matching': True,
4427 }, {
4428 'url': ':ytfavorites',
4429 'only_matching': True,
4430 }]
4431
4432 def _real_extract(self, url):
4433 return self.url_result(
4434 'https://www.youtube.com/playlist?list=LL',
4435 ie=YoutubeTabIE.ie_key())
4436
4437
79360d99 4438class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4439 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4440 # there doesn't appear to be a real limit, for example if you search for
4441 # 'python' you get more than 8.000.000 results
4442 _MAX_RESULTS = float('inf')
78caa52a 4443 IE_NAME = 'youtube:search'
b05654f0 4444 _SEARCH_KEY = 'ytsearch'
6c894ea1 4445 _SEARCH_PARAMS = None
9dd8e46a 4446 _TESTS = []
b05654f0 4447
6c894ea1 4448 def _entries(self, query, n):
a5c56234 4449 data = {'query': query}
6c894ea1
U
4450 if self._SEARCH_PARAMS:
4451 data['params'] = self._SEARCH_PARAMS
4452 total = 0
fe93e2c4 4453 continuation = {}
6c894ea1 4454 for page_num in itertools.count(1):
fe93e2c4 4455 data.update(continuation)
79360d99 4456 search = self._extract_response(
4457 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4458 check_get_keys=('contents', 'onResponseReceivedCommands')
4459 )
6c894ea1 4460 if not search:
b4c08069 4461 break
6c894ea1
U
4462 slr_contents = try_get(
4463 search,
4464 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4465 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4466 list)
4467 if not slr_contents:
a22b2fd1 4468 break
0366ae87 4469
0366ae87
M
4470 # Youtube sometimes adds promoted content to searches,
4471 # changing the index location of videos and token.
4472 # So we search through all entries till we find them.
fe93e2c4 4473 continuation = None
30a074c2 4474 for slr_content in slr_contents:
fe93e2c4 4475 if not continuation:
4476 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4477
30a074c2 4478 isr_contents = try_get(
4479 slr_content,
4480 lambda x: x['itemSectionRenderer']['contents'],
4481 list)
9da76d30 4482 if not isr_contents:
30a074c2 4483 continue
4484 for content in isr_contents:
4485 if not isinstance(content, dict):
4486 continue
4487 video = content.get('videoRenderer')
4488 if not isinstance(video, dict):
4489 continue
4490 video_id = video.get('videoId')
4491 if not video_id:
4492 continue
4493
4494 yield self._extract_video(video)
4495 total += 1
4496 if total == n:
4497 return
0366ae87 4498
fe93e2c4 4499 if not continuation:
6c894ea1 4500 break
b05654f0 4501
6c894ea1
U
4502 def _get_n_results(self, query, n):
4503 """Get a specified number of results for a query"""
11f9be09 4504 return self.playlist_result(self._entries(query, n), query, query)
75dff0ee 4505
c9ae7b95 4506
a3dd9248 4507class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4508 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4509 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4510 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4511 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4512
c9ae7b95 4513
386e1dd9 4514class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4515 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4516 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4517 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4518 # _MAX_RESULTS = 100
3462ffa8 4519 _TESTS = [{
4520 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4521 'playlist_mincount': 5,
4522 'info_dict': {
11f9be09 4523 'id': 'youtube-dl test video',
3462ffa8 4524 'title': 'youtube-dl test video',
4525 }
4526 }, {
4527 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4528 'only_matching': True,
4529 }]
4530
386e1dd9 4531 @classmethod
4532 def _make_valid_url(cls):
4533 return cls._VALID_URL
4534
3462ffa8 4535 def _real_extract(self, url):
386e1dd9 4536 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4537 query = (qs.get('search_query') or qs.get('q'))[0]
4538 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4539 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4540
4541
4542class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4543 """
25f14e9f 4544 Base class for feed extractors
3d3dddc9 4545 Subclasses must define the _FEED_NAME property.
d7ae0639 4546 """
b2e8bc1b 4547 _LOGIN_REQUIRED = True
ef2f3c7f 4548 _TESTS = []
d7ae0639
JMF
4549
4550 @property
4551 def IE_NAME(self):
78caa52a 4552 return 'youtube:%s' % self._FEED_NAME
04cc9617 4553
3853309f 4554 def _real_extract(self, url):
3d3dddc9 4555 return self.url_result(
4556 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4557 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4558
4559
ef2f3c7f 4560class YoutubeWatchLaterIE(InfoExtractor):
4561 IE_NAME = 'youtube:watchlater'
70d5c17b 4562 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4563 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4564 _TESTS = [{
8bdd16b4 4565 'url': ':ytwatchlater',
bc7a9cd8
S
4566 'only_matching': True,
4567 }]
25f14e9f
S
4568
4569 def _real_extract(self, url):
ef2f3c7f 4570 return self.url_result(
4571 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4572
4573
25f14e9f
S
4574class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4575 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4576 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4577 _FEED_NAME = 'recommended'
45db527f 4578 _LOGIN_REQUIRED = False
3d3dddc9 4579 _TESTS = [{
4580 'url': ':ytrec',
4581 'only_matching': True,
4582 }, {
4583 'url': ':ytrecommended',
4584 'only_matching': True,
4585 }, {
4586 'url': 'https://youtube.com',
4587 'only_matching': True,
4588 }]
1ed5b5c9 4589
1ed5b5c9 4590
25f14e9f 4591class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4592 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4593 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4594 _FEED_NAME = 'subscriptions'
3d3dddc9 4595 _TESTS = [{
4596 'url': ':ytsubs',
4597 'only_matching': True,
4598 }, {
4599 'url': ':ytsubscriptions',
4600 'only_matching': True,
4601 }]
1ed5b5c9 4602
1ed5b5c9 4603
25f14e9f 4604class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4605 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4606 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4607 _FEED_NAME = 'history'
3d3dddc9 4608 _TESTS = [{
4609 'url': ':ythistory',
4610 'only_matching': True,
4611 }]
1ed5b5c9
JMF
4612
4613
15870e90
PH
4614class YoutubeTruncatedURLIE(InfoExtractor):
4615 IE_NAME = 'youtube:truncated_url'
4616 IE_DESC = False # Do not list
975d35db 4617 _VALID_URL = r'''(?x)
b95aab84
PH
4618 (?:https?://)?
4619 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4620 (?:watch\?(?:
c4808c60 4621 feature=[a-z_]+|
b95aab84
PH
4622 annotation_id=annotation_[^&]+|
4623 x-yt-cl=[0-9]+|
c1708b89 4624 hl=[^&]*|
287be8c6 4625 t=[0-9]+
b95aab84
PH
4626 )?
4627 |
4628 attribution_link\?a=[^&]+
4629 )
4630 $
975d35db 4631 '''
15870e90 4632
c4808c60 4633 _TESTS = [{
2d3d2997 4634 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4635 'only_matching': True,
dc2fc736 4636 }, {
2d3d2997 4637 'url': 'https://www.youtube.com/watch?',
dc2fc736 4638 'only_matching': True,
b95aab84
PH
4639 }, {
4640 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4641 'only_matching': True,
4642 }, {
4643 'url': 'https://www.youtube.com/watch?feature=foo',
4644 'only_matching': True,
c1708b89
PH
4645 }, {
4646 'url': 'https://www.youtube.com/watch?hl=en-GB',
4647 'only_matching': True,
287be8c6
PH
4648 }, {
4649 'url': 'https://www.youtube.com/watch?t=2372',
4650 'only_matching': True,
c4808c60
PH
4651 }]
4652
15870e90
PH
4653 def _real_extract(self, url):
4654 raise ExtractorError(
78caa52a
PH
4655 'Did you forget to quote the URL? Remember that & is a meta '
4656 'character in most shells, so you want to put the URL in quotes, '
3867038a 4657 'like youtube-dl '
2d3d2997 4658 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4659 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4660 expected=True)
772fd5cc
PH
4661
4662
4663class YoutubeTruncatedIDIE(InfoExtractor):
4664 IE_NAME = 'youtube:truncated_id'
4665 IE_DESC = False # Do not list
b95aab84 4666 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4667
4668 _TESTS = [{
4669 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4670 'only_matching': True,
4671 }]
4672
4673 def _real_extract(self, url):
4674 video_id = self._match_id(url)
4675 raise ExtractorError(
4676 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4677 expected=True)