]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
bugfix for 80c03fa98fdd54410bd36684ef453f6976a9c0bf
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
641ad5d8 41 is_html,
94278f72 42 mimetype2ext,
9c0d7f49 43 network_exceptions,
11f9be09 44 orderedSet,
6310acf5 45 parse_codecs,
49bd8c66 46 parse_count,
7c80519c 47 parse_duration,
7ea65411 48 parse_iso8601,
4dfbf869 49 parse_qs,
dca3ff4a 50 qualities,
c0ac49bc 51 remove_end,
3995d37d 52 remove_start,
cf7e015f 53 smuggle_url,
dbdaaa23 54 str_or_none,
c93d53f5 55 str_to_int,
7c365c21 56 traverse_obj,
556dbe7f 57 try_get,
c5e8d7af
PH
58 unescapeHTML,
59 unified_strdate,
cf7e015f 60 unsmuggle_url,
8bdd16b4 61 update_url_query,
21c340b8 62 url_or_none,
fe93e2c4 63 urljoin,
7c365c21 64 variadic,
c5e8d7af
PH
65)
66
5f6a1245 67
000c15a4 68# any clients starting with _ cannot be explicity requested by the user
69INNERTUBE_CLIENTS = {
70 'web': {
71 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
72 'INNERTUBE_CONTEXT': {
73 'client': {
74 'clientName': 'WEB',
75 'clientVersion': '2.20210622.10.00',
76 }
77 },
78 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
79 },
80 'web_embedded': {
81 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
82 'INNERTUBE_CONTEXT': {
83 'client': {
84 'clientName': 'WEB_EMBEDDED_PLAYER',
85 'clientVersion': '1.20210620.0.1',
86 },
87 },
88 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
89 },
90 'web_music': {
91 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
92 'INNERTUBE_HOST': 'music.youtube.com',
93 'INNERTUBE_CONTEXT': {
94 'client': {
95 'clientName': 'WEB_REMIX',
96 'clientVersion': '1.20210621.00.00',
97 }
98 },
99 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
100 },
e7e94f2a
D
101 'web_creator': {
102 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
103 'INNERTUBE_CONTEXT': {
104 'client': {
105 'clientName': 'WEB_CREATOR',
106 'clientVersion': '1.20210621.00.00',
107 }
108 },
109 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
110 },
000c15a4 111 'android': {
112 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
113 'INNERTUBE_CONTEXT': {
114 'client': {
115 'clientName': 'ANDROID',
116 'clientVersion': '16.20',
117 }
118 },
119 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 120 'REQUIRE_JS_PLAYER': False
000c15a4 121 },
122 'android_embedded': {
123 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
124 'INNERTUBE_CONTEXT': {
125 'client': {
126 'clientName': 'ANDROID_EMBEDDED_PLAYER',
127 'clientVersion': '16.20',
128 },
129 },
b6de707d 130 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
131 'REQUIRE_JS_PLAYER': False
000c15a4 132 },
133 'android_music': {
134 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
135 'INNERTUBE_HOST': 'music.youtube.com',
136 'INNERTUBE_CONTEXT': {
137 'client': {
138 'clientName': 'ANDROID_MUSIC',
139 'clientVersion': '4.32',
140 }
141 },
142 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 143 'REQUIRE_JS_PLAYER': False
000c15a4 144 },
e7e94f2a
D
145 'android_creator': {
146 'INNERTUBE_CONTEXT': {
147 'client': {
148 'clientName': 'ANDROID_CREATOR',
149 'clientVersion': '21.24.100',
150 },
151 },
b6de707d 152 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
153 'REQUIRE_JS_PLAYER': False
e7e94f2a 154 },
3619f78d 155 # ios has HLS live streams
156 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
000c15a4 157 'ios': {
158 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
159 'INNERTUBE_CONTEXT': {
160 'client': {
161 'clientName': 'IOS',
162 'clientVersion': '16.20',
163 }
164 },
b6de707d 165 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
166 'REQUIRE_JS_PLAYER': False
000c15a4 167 },
168 'ios_embedded': {
169 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
170 'INNERTUBE_CONTEXT': {
171 'client': {
172 'clientName': 'IOS_MESSAGES_EXTENSION',
173 'clientVersion': '16.20',
174 },
175 },
b6de707d 176 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
177 'REQUIRE_JS_PLAYER': False
000c15a4 178 },
179 'ios_music': {
180 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
181 'INNERTUBE_HOST': 'music.youtube.com',
182 'INNERTUBE_CONTEXT': {
183 'client': {
184 'clientName': 'IOS_MUSIC',
185 'clientVersion': '4.32',
186 },
187 },
b6de707d 188 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
189 'REQUIRE_JS_PLAYER': False
000c15a4 190 },
e7e94f2a
D
191 'ios_creator': {
192 'INNERTUBE_CONTEXT': {
193 'client': {
194 'clientName': 'IOS_CREATOR',
195 'clientVersion': '21.24.100',
196 },
197 },
b6de707d 198 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
199 'REQUIRE_JS_PLAYER': False
e7e94f2a 200 },
3619f78d 201 # mweb has 'ultralow' formats
202 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 203 'mweb': {
204 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
205 'INNERTUBE_CONTEXT': {
206 'client': {
207 'clientName': 'MWEB',
208 'clientVersion': '2.20210721.07.00',
209 }
210 },
211 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
212 },
213}
214
215
216def build_innertube_clients():
65c2fde2 217 third_party = {
218 'embedUrl': 'https://google.com', # Can be any valid URL
219 }
000c15a4 220 base_clients = ('android', 'web', 'ios', 'mweb')
221 priority = qualities(base_clients[::-1])
222
223 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 224 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 225 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 226 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 227 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
228 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
229
230 if client in base_clients:
231 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
232 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
65c2fde2 233 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 234 agegate_ytcfg['priority'] -= 1
235 elif client.endswith('_embedded'):
65c2fde2 236 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 237 ytcfg['priority'] -= 2
238 else:
239 ytcfg['priority'] -= 3
240
241
242build_innertube_clients()
243
244
de7f3446 245class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 246 """Provide base functions for Youtube extractors"""
e00eb564 247
3462ffa8 248 _RESERVED_NAMES = (
3cd786db 249 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
3619f78d 250 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
251 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 252 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 253
3619f78d 254 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
255
b2e8bc1b 256 _NETRC_MACHINE = 'youtube'
3619f78d 257
b2e8bc1b
JMF
258 # If True it will raise an error if no login info is provided
259 _LOGIN_REQUIRED = False
260
3619f78d 261 r''' # Unused since login is broken
262 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
263 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
264
265 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
266 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
267 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
268 '''
d0ba5587 269
b2e8bc1b 270 def _login(self):
83317f69 271 """
272 Attempt to log in to YouTube.
273 True is returned if successful or skipped.
274 False is returned if login failed.
275
276 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
277 """
9d5d4d64 278
279 def warn(message):
280 self.report_warning(message)
281
282 # username+password login is broken
982ee69a
MB
283 if (self._LOGIN_REQUIRED
284 and self.get_param('cookiefile') is None
285 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 286 self.raise_login_required(
287 'Login details are needed to download this content', method='cookies')
68217024 288 username, password = self._get_login_info()
9d5d4d64 289 if username:
290 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
291 return
9d5d4d64 292
2d6659b9 293 # Everything below this is broken!
294 r'''
b2e8bc1b
JMF
295 # No authentication to be performed
296 if username is None:
a06916d9 297 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 298 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 299 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 300 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 301 return True
b2e8bc1b 302
7cc3570e
PH
303 login_page = self._download_webpage(
304 self._LOGIN_URL, None,
69ea8ca4
PH
305 note='Downloading login page',
306 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
307 if login_page is False:
308 return
b2e8bc1b 309
1212e997 310 login_form = self._hidden_inputs(login_page)
c5e8d7af 311
e00eb564
S
312 def req(url, f_req, note, errnote):
313 data = login_form.copy()
314 data.update({
315 'pstMsg': 1,
316 'checkConnection': 'youtube',
317 'checkedDomains': 'youtube',
318 'hl': 'en',
319 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 320 'f.req': json.dumps(f_req),
e00eb564
S
321 'flowName': 'GlifWebSignIn',
322 'flowEntry': 'ServiceLogin',
baf67a60
S
323 # TODO: reverse actual botguard identifier generation algo
324 'bgRequest': '["identifier",""]',
041bc3ad 325 })
e00eb564
S
326 return self._download_json(
327 url, None, note=note, errnote=errnote,
328 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
329 fatal=False,
330 data=urlencode_postdata(data), headers={
331 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
332 'Google-Accounts-XSRF': 1,
333 })
334
3995d37d
S
335 lookup_req = [
336 username,
337 None, [], None, 'US', None, None, 2, False, True,
338 [
339 None, None,
340 [2, 1, None, 1,
341 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
342 None, [], 4],
343 1, [None, None, []], None, None, None, True
344 ],
345 username,
346 ]
347
e00eb564 348 lookup_results = req(
3995d37d 349 self._LOOKUP_URL, lookup_req,
e00eb564
S
350 'Looking up account info', 'Unable to look up account info')
351
352 if lookup_results is False:
353 return False
041bc3ad 354
3995d37d
S
355 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
356 if not user_hash:
357 warn('Unable to extract user hash')
358 return False
359
360 challenge_req = [
361 user_hash,
362 None, 1, None, [1, None, None, None, [password, None, True]],
363 [
364 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
365 1, [None, None, []], None, None, None, True
366 ]]
83317f69 367
3995d37d
S
368 challenge_results = req(
369 self._CHALLENGE_URL, challenge_req,
370 'Logging in', 'Unable to log in')
83317f69 371
3995d37d 372 if challenge_results is False:
e00eb564 373 return
83317f69 374
3995d37d
S
375 login_res = try_get(challenge_results, lambda x: x[0][5], list)
376 if login_res:
377 login_msg = try_get(login_res, lambda x: x[5], compat_str)
378 warn(
379 'Unable to login: %s' % 'Invalid password'
380 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
381 return False
382
383 res = try_get(challenge_results, lambda x: x[0][-1], list)
384 if not res:
385 warn('Unable to extract result entry')
386 return False
387
9a6628aa
S
388 login_challenge = try_get(res, lambda x: x[0][0], list)
389 if login_challenge:
390 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
391 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
392 # SEND_SUCCESS - TFA code has been successfully sent to phone
393 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 394 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
395 if status == 'QUOTA_EXCEEDED':
396 warn('Exceeded the limit of TFA codes, try later')
397 return False
398
399 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
400 if not tl:
401 warn('Unable to extract TL')
402 return False
403
404 tfa_code = self._get_tfa_info('2-step verification code')
405
406 if not tfa_code:
407 warn(
408 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
409 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
410 return False
411
412 tfa_code = remove_start(tfa_code, 'G-')
413
414 tfa_req = [
415 user_hash, None, 2, None,
416 [
417 9, None, None, None, None, None, None, None,
418 [None, tfa_code, True, 2]
419 ]]
420
421 tfa_results = req(
422 self._TFA_URL.format(tl), tfa_req,
423 'Submitting TFA code', 'Unable to submit TFA code')
424
425 if tfa_results is False:
426 return False
427
428 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
429 if tfa_res:
430 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
431 warn(
432 'Unable to finish TFA: %s' % 'Invalid TFA code'
433 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
434 return False
435
436 check_cookie_url = try_get(
437 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
438 else:
439 CHALLENGES = {
440 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
441 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
442 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
443 }
444 challenge = CHALLENGES.get(
445 challenge_str,
446 '%s returned error %s.' % (self.IE_NAME, challenge_str))
447 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
448 return False
3995d37d
S
449 else:
450 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
451
452 if not check_cookie_url:
453 warn('Unable to extract CheckCookie URL')
454 return False
e00eb564
S
455
456 check_cookie_results = self._download_webpage(
3995d37d
S
457 check_cookie_url, None, 'Checking cookie', fatal=False)
458
459 if check_cookie_results is False:
460 return False
e00eb564 461
3995d37d
S
462 if 'https://myaccount.google.com/' not in check_cookie_results:
463 warn('Unable to log in')
b2e8bc1b 464 return False
e00eb564 465
b2e8bc1b 466 return True
2d6659b9 467 '''
b2e8bc1b 468
cce889b9 469 def _initialize_consent(self):
470 cookies = self._get_cookies('https://www.youtube.com/')
471 if cookies.get('__Secure-3PSID'):
472 return
473 consent_id = None
474 consent = cookies.get('CONSENT')
475 if consent:
476 if 'YES' in consent.value:
477 return
478 consent_id = self._search_regex(
479 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
480 if not consent_id:
481 consent_id = random.randint(100, 999)
482 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 483
b2e8bc1b 484 def _real_initialize(self):
cce889b9 485 self._initialize_consent()
b2e8bc1b
JMF
486 if self._downloader is None:
487 return
b2e8bc1b
JMF
488 if not self._login():
489 return
c5e8d7af 490
a0566bbf 491 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 492 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
493 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 494
000c15a4 495 def _get_default_ytcfg(self, client='web'):
496 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 497
000c15a4 498 def _get_innertube_host(self, client='web'):
499 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 500
000c15a4 501 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 502 # try_get but with fallback to default ytcfg client values when present
503 _func = lambda y: try_get(y, getter, expected_type)
504 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
505
000c15a4 506 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 507 return self._ytcfg_get_safe(
508 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
509 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 510
000c15a4 511 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 512 return self._ytcfg_get_safe(
513 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
514 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 515
000c15a4 516 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 517 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
518
000c15a4 519 def _extract_context(self, ytcfg=None, default_client='web'):
109dd3b2 520 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
521 context = _get_context(ytcfg)
522 if context:
523 return context
524
525 context = _get_context(self._get_default_ytcfg(default_client))
526 if not ytcfg:
527 return context
528
529 # Recreate the client context (required)
530 context['client'].update({
531 'clientVersion': self._extract_client_version(ytcfg, default_client),
532 'clientName': self._extract_client_name(ytcfg, default_client),
533 })
534 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
535 if visitor_data:
536 context['client']['visitorData'] = visitor_data
537 return context
538
cf87314d 539 _SAPISID = None
540
109dd3b2 541 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 542 time_now = round(time.time())
cf87314d 543 if self._SAPISID is None:
544 yt_cookies = self._get_cookies('https://www.youtube.com')
545 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
546 # See: https://github.com/yt-dlp/yt-dlp/issues/393
547 sapisid_cookie = dict_get(
548 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
549 if sapisid_cookie and sapisid_cookie.value:
550 self._SAPISID = sapisid_cookie.value
551 self.write_debug('Extracted SAPISID cookie')
552 # SAPISID cookie is required if not already present
553 if not yt_cookies.get('SAPISID'):
554 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
555 self._set_cookie(
556 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
557 else:
558 self._SAPISID = False
559 if not self._SAPISID:
560 return None
1974e99f 561 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
562 sapisidhash = hashlib.sha1(
cf87314d 563 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
1974e99f 564 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
565
566 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 567 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 568 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 569
109dd3b2 570 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 571 data.update(query)
11f9be09 572 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 573 real_headers.update({'content-type': 'application/json'})
574 if headers:
575 real_headers.update(headers)
545cc85d 576 return self._download_json(
109dd3b2 577 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 578 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 579 data=json.dumps(data).encode('utf8'), headers=real_headers,
580 query={'key': api_key or self._extract_api_key()})
581
11f9be09 582 def extract_yt_initial_data(self, video_id, webpage):
8bdd16b4 583 return self._parse_json(
584 self._search_regex(
29f7c58a 585 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 586 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 587 video_id)
0c148415 588
99e9e001 589 @staticmethod
590 def _extract_session_index(*data):
591 """
592 Index of current account in account list.
593 See: https://github.com/yt-dlp/yt-dlp/pull/519
594 """
595 for ytcfg in data:
596 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
597 if session_index is not None:
598 return session_index
599
600 # Deprecated?
601 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca
M
602 if ytcfg:
603 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
604 if token:
605 return token
99e9e001 606 if webpage:
607 return self._search_regex(
608 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
609 'identity token', default=None, fatal=False)
a1c5d2ca
M
610
611 @staticmethod
fe93e2c4 612 def _extract_account_syncid(*args):
8ea3f7b9 613 """
614 Extract syncId required to download private playlists of secondary channels
fe93e2c4 615 @params response and/or ytcfg
8ea3f7b9 616 """
fe93e2c4 617 for data in args:
618 # ytcfg includes channel_syncid if on secondary channel
619 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
620 if delegated_sid:
621 return delegated_sid
622 sync_ids = (try_get(
623 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
e6f21b3d 624 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
fe93e2c4 625 if len(sync_ids) >= 2 and sync_ids[1]:
626 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
627 # and just "user_syncid||" for primary channel. We only want the channel_syncid
628 return sync_ids[0]
a1c5d2ca 629
99e9e001 630 @property
631 def is_authenticated(self):
632 return bool(self._generate_sapisidhash_header())
633
11f9be09 634 def extract_ytcfg(self, video_id, webpage):
8c54a305 635 if not webpage:
636 return {}
29f7c58a 637 return self._parse_json(
638 self._search_regex(
639 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 640 default='{}'), video_id, fatal=False) or {}
641
11f9be09 642 def generate_api_headers(
99e9e001 643 self, *, ytcfg=None, account_syncid=None, session_index=None,
644 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
645
11f9be09 646 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 647 headers = {
109dd3b2 648 'X-YouTube-Client-Name': compat_str(
11f9be09 649 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
650 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 651 'Origin': origin,
652 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
653 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
654 'X-Goog-Visitor-Id': visitor_data or try_get(
11f9be09 655 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
99e9e001 656 }
657 if session_index is None:
314ee305 658 session_index = self._extract_session_index(ytcfg)
659 if account_syncid or session_index is not None:
660 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 661
109dd3b2 662 auth = self._generate_sapisidhash_header(origin)
f4f751af 663 if auth is not None:
664 headers['Authorization'] = auth
109dd3b2 665 headers['X-Origin'] = origin
99e9e001 666 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 667
2d6659b9 668 @staticmethod
669 def _build_api_continuation_query(continuation, ctp=None):
670 query = {
671 'continuation': continuation
672 }
673 # TODO: Inconsistency with clickTrackingParams.
674 # Currently we have a fixed ctp contained within context (from ytcfg)
675 # and a ctp in root query for continuation.
676 if ctp:
677 query['clickTracking'] = {'clickTrackingParams': ctp}
678 return query
679
2d6659b9 680 @classmethod
681 def _extract_next_continuation_data(cls, renderer):
682 next_continuation = try_get(
683 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
684 lambda x: x['continuation']['reloadContinuationData']), dict)
685 if not next_continuation:
686 return
687 continuation = next_continuation.get('continuation')
688 if not continuation:
689 return
690 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 691 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 692
693 @classmethod
694 def _extract_continuation_ep_data(cls, continuation_ep: dict):
695 if isinstance(continuation_ep, dict):
696 continuation = try_get(
697 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
698 if not continuation:
699 return
700 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 701 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 702
703 @classmethod
704 def _extract_continuation(cls, renderer):
705 next_continuation = cls._extract_next_continuation_data(renderer)
706 if next_continuation:
707 return next_continuation
fe93e2c4 708
2d6659b9 709 contents = []
710 for key in ('contents', 'items'):
711 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 712
2d6659b9 713 for content in contents:
714 if not isinstance(content, dict):
715 continue
716 continuation_ep = try_get(
717 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
718 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
719 dict)
720 continuation = cls._extract_continuation_ep_data(continuation_ep)
721 if continuation:
722 return continuation
723
fe93e2c4 724 @classmethod
725 def _extract_alerts(cls, data):
109dd3b2 726 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
727 if not isinstance(alert_dict, dict):
728 continue
729 for alert in alert_dict.values():
730 alert_type = alert.get('type')
731 if not alert_type:
732 continue
052e1350 733 message = cls._get_text(alert, 'text')
109dd3b2 734 if message:
735 yield alert_type, message
736
c0ac49bc 737 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 738 errors = []
739 warnings = []
740 for alert_type, alert_message in alerts:
641ad5d8 741 if alert_type.lower() == 'error' and fatal:
109dd3b2 742 errors.append([alert_type, alert_message])
743 else:
744 warnings.append([alert_type, alert_message])
745
746 for alert_type, alert_message in (warnings + errors[:-1]):
c0ac49bc 747 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
109dd3b2 748 if errors:
749 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
750
751 def _extract_and_report_alerts(self, data, *args, **kwargs):
752 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
753
47193e02 754 def _extract_badges(self, renderer: dict):
755 badges = set()
756 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
757 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
758 if label:
759 badges.add(label.lower())
760 return badges
761
762 @staticmethod
052e1350 763 def _get_text(data, *path_list, max_runs=None):
764 for path in path_list or [None]:
765 if path is None:
766 obj = [data]
767 else:
768 obj = traverse_obj(data, path, default=[])
769 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
770 obj = [obj]
771 for item in obj:
772 text = try_get(item, lambda x: x['simpleText'], compat_str)
773 if text:
774 return text
775 runs = try_get(item, lambda x: x['runs'], list) or []
776 if not runs and isinstance(item, list):
777 runs = item
778
779 runs = runs[:min(len(runs), max_runs or len(runs))]
780 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
781 if text:
782 return text
47193e02 783
109dd3b2 784 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
785 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 786 default_client='web'):
109dd3b2 787 response = None
788 last_error = None
789 count = -1
790 retries = self.get_param('extractor_retries', 3)
791 if check_get_keys is None:
792 check_get_keys = []
793 while count < retries:
794 count += 1
795 if last_error:
c0ac49bc 796 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
109dd3b2 797 try:
798 response = self._call_api(
799 ep=ep, fatal=True, headers=headers,
800 video_id=item_id, query=query,
801 context=self._extract_context(ytcfg, default_client),
802 api_key=self._extract_api_key(ytcfg, default_client),
803 api_hostname=api_hostname, default_client=default_client,
804 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
805 except ExtractorError as e:
9c0d7f49 806 if isinstance(e.cause, network_exceptions):
641ad5d8 807 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
808 e.cause.seek(0)
809 yt_error = try_get(
810 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
811 lambda x: x['error']['message'], compat_str)
812 if yt_error:
813 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 814 # Downloading page may result in intermittent 5xx HTTP error
815 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 816 # We also want to catch all other network exceptions since errors in later pages can be troublesome
817 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
818 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
526d74ec 819 last_error = error_to_compat_str(e.cause or e.msg)
9c0d7f49 820 if count < retries:
821 continue
109dd3b2 822 if fatal:
823 raise
824 else:
825 self.report_warning(error_to_compat_str(e))
826 return
827
828 else:
829 # Youtube may send alerts if there was an issue with the continuation page
830 try:
c0ac49bc 831 self._extract_and_report_alerts(response, expected=False, only_once=True)
109dd3b2 832 except ExtractorError as e:
c0ac49bc 833 # YouTube servers may return errors we want to retry on in a 200 OK response
834 # See: https://github.com/yt-dlp/yt-dlp/issues/839
835 if 'unknown error' in e.msg.lower():
836 last_error = e.msg
837 continue
109dd3b2 838 if fatal:
839 raise
840 self.report_warning(error_to_compat_str(e))
841 return
842 if not check_get_keys or dict_get(response, check_get_keys):
843 break
844 # Youtube sometimes sends incomplete data
845 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
846 last_error = 'Incomplete data received'
847 if count >= retries:
848 if fatal:
849 raise ExtractorError(last_error)
850 else:
851 self.report_warning(last_error)
852 return
853 return response
854
9297939e 855 @staticmethod
856 def is_music_url(url):
857 return re.match(r'https?://music\.youtube\.com/', url) is not None
858
30a074c2 859 def _extract_video(self, renderer):
860 video_id = renderer.get('videoId')
052e1350 861 title = self._get_text(renderer, 'title')
862 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 863 duration = parse_duration(self._get_text(
864 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 865 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 866 view_count = str_to_int(self._search_regex(
867 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
868 'view count', default=None))
fe93e2c4 869
052e1350 870 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
fe93e2c4 871
30a074c2 872 return {
39ed931e 873 '_type': 'url',
30a074c2 874 'ie_key': YoutubeIE.ie_key(),
875 'id': video_id,
5e3f2f8f 876 'url': f'https://www.youtube.com/watch?v={video_id}',
30a074c2 877 'title': title,
878 'description': description,
879 'duration': duration,
880 'view_count': view_count,
881 'uploader': uploader,
882 }
883
0c148415 884
360e1ca5 885class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 886 IE_DESC = 'YouTube.com'
bc2ca1bb 887 _INVIDIOUS_SITES = (
888 # invidious-redirect websites
889 r'(?:www\.)?redirect\.invidious\.io',
890 r'(?:(?:www|dev)\.)?invidio\.us',
891 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
892 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 893 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 894 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 895 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 896 # youtube-dl invidious instances list
897 r'(?:(?:www|no)\.)?invidiou\.sh',
898 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
899 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 900 r'(?:www\.)?invidious\.mastodon\.host',
901 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 902 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 903 r'(?:www\.)?invidious\.tinfoil-hat\.net',
904 r'(?:www\.)?invidious\.himiko\.cloud',
905 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 906 r'(?:www\.)?invidious\.tube',
907 r'(?:www\.)?invidiou\.site',
908 r'(?:www\.)?invidious\.site',
909 r'(?:www\.)?invidious\.xyz',
910 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 911 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 912 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 913 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 914 r'(?:www\.)?tube\.poal\.co',
915 r'(?:www\.)?tube\.connect\.cafe',
916 r'(?:www\.)?vid\.wxzm\.sx',
917 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 918 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 919 r'(?:www\.)?yewtu\.be',
920 r'(?:www\.)?yt\.elukerio\.org',
921 r'(?:www\.)?yt\.lelux\.fi',
922 r'(?:www\.)?invidious\.ggc-project\.de',
923 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 924 r'(?:www\.)?ytprivate\.com',
925 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 926 r'(?:www\.)?invidious\.toot\.koeln',
927 r'(?:www\.)?invidious\.fdn\.fr',
928 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 929 r'(?:www\.)?invidious\.namazso\.eu',
930 r'(?:www\.)?invidious\.silkky\.cloud',
931 r'(?:www\.)?invidious\.exonip\.de',
932 r'(?:www\.)?invidious\.riverside\.rocks',
933 r'(?:www\.)?invidious\.blamefran\.net',
934 r'(?:www\.)?invidious\.moomoo\.de',
935 r'(?:www\.)?ytb\.trom\.tf',
936 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 937 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
938 r'(?:www\.)?qklhadlycap4cnod\.onion',
939 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
940 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
941 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
942 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
943 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
944 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 945 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
946 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
947 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
948 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 949 )
cb7dfeea 950 _VALID_URL = r"""(?x)^
c5e8d7af 951 (
edb53e2d 952 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 953 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
954 (?:www\.)?deturl\.com/www\.youtube\.com|
955 (?:www\.)?pwnyoutube\.com|
956 (?:www\.)?hooktube\.com|
957 (?:www\.)?yourepeat\.com|
958 tube\.majestyc\.net|
959 %(invidious)s|
960 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
961 (?:.*?\#/)? # handle anchor (#/) redirect urls
962 (?: # the various things that can precede the ID:
8fc54b12 963 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
c5e8d7af 964 |(?: # or the v= param in all its forms
f7000f3a 965 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 966 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 967 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
968 v=
969 )
f4b05232 970 ))
cbaed4bb
S
971 |(?:
972 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
973 vid\.plus| # or vid.plus/xxxx
974 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 975 %(invidious)s
cbaed4bb 976 )/
edb53e2d 977 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 978 )
c5e8d7af 979 )? # all until now is optional -> you can pass the naked ID
201c1459 980 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 981 (?(1).+)? # if we found the ID, everything can follow
9297939e 982 (?:\#|$)""" % {
bc2ca1bb 983 'invidious': '|'.join(_INVIDIOUS_SITES),
984 }
e40c758c 985 _PLAYER_INFO_RE = (
cc2db878 986 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
987 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 988 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 989 )
2c62dc26 990 _formats = {
c2d3cb4c 991 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
992 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
993 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
994 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
995 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
996 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
997 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
998 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 999 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 1000 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1001 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1002 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1003 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1004 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1005 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 1006 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 1007 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1008 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 1009
1010
1011 # 3D videos
c2d3cb4c 1012 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1013 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1014 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1015 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1016 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1017 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1018 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1019
96fb5605 1020 # Apple HTTP Live Streaming
11f12195 1021 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1022 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1023 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1024 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1025 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1026 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1027 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1028 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1029
1030 # DASH mp4 video
d23028a8
S
1031 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1032 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1033 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1034 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1035 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1036 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1037 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1038 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1039 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1040 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1041 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1042 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1043
f6f1fc92 1044 # Dash mp4 audio
d23028a8
S
1045 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1046 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1047 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1048 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1049 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1050 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1051 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1052
1053 # Dash webm
d23028a8
S
1054 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1055 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1056 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1057 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1058 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1059 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1060 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1061 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1062 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1063 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1064 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1065 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1066 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1067 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1068 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1069 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1070 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1071 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1072 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1073 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1074 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1075 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1076
1077 # Dash webm audio
d23028a8
S
1078 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1079 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1080
0857baad 1081 # Dash webm audio with opus inside
d23028a8
S
1082 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1083 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1084 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1085
ce6b9a2d
PH
1086 # RTMP (unnamed)
1087 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1088
1089 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1090 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1091 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1092 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1093 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1094 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1095 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1096 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1097 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1098 }
29f7c58a 1099 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1100
fd5c4aab
S
1101 _GEO_BYPASS = False
1102
78caa52a 1103 IE_NAME = 'youtube'
2eb88d95
PH
1104 _TESTS = [
1105 {
2d3d2997 1106 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1107 'info_dict': {
1108 'id': 'BaW_jenozKc',
1109 'ext': 'mp4',
3867038a 1110 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1111 'uploader': 'Philipp Hagemeister',
1112 'uploader_id': 'phihag',
ec85ded8 1113 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
1114 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1115 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1116 'upload_date': '20121002',
3867038a 1117 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 1118 'categories': ['Science & Technology'],
3867038a 1119 'tags': ['youtube-dl'],
556dbe7f 1120 'duration': 10,
dbdaaa23 1121 'view_count': int,
3e7c1224
PH
1122 'like_count': int,
1123 'dislike_count': int,
7c80519c 1124 'start_time': 1,
297a564b 1125 'end_time': 9,
2eb88d95 1126 }
0e853ca4 1127 },
fccd3771 1128 {
4bc3a23e
PH
1129 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1130 'note': 'Embed-only video (#1746)',
1131 'info_dict': {
1132 'id': 'yZIXLfi8CZQ',
1133 'ext': 'mp4',
1134 'upload_date': '20120608',
1135 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1136 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1137 'uploader': 'SET India',
94bfcd23 1138 'uploader_id': 'setindia',
ec85ded8 1139 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1140 'age_limit': 18,
545cc85d 1141 },
1142 'skip': 'Private video',
fccd3771 1143 },
11b56058 1144 {
8bdd16b4 1145 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1146 'note': 'Use the first video ID in the URL',
1147 'info_dict': {
1148 'id': 'BaW_jenozKc',
1149 'ext': 'mp4',
3867038a 1150 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1151 'uploader': 'Philipp Hagemeister',
1152 'uploader_id': 'phihag',
ec85ded8 1153 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1154 'upload_date': '20121002',
3867038a 1155 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1156 'categories': ['Science & Technology'],
3867038a 1157 'tags': ['youtube-dl'],
556dbe7f 1158 'duration': 10,
dbdaaa23 1159 'view_count': int,
11b56058
PM
1160 'like_count': int,
1161 'dislike_count': int,
34a7de29
S
1162 },
1163 'params': {
1164 'skip_download': True,
1165 },
11b56058 1166 },
dd27fd17 1167 {
2d3d2997 1168 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1169 'note': '256k DASH audio (format 141) via DASH manifest',
1170 'info_dict': {
1171 'id': 'a9LDPn-MO4I',
1172 'ext': 'm4a',
1173 'upload_date': '20121002',
1174 'uploader_id': '8KVIDEO',
ec85ded8 1175 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1176 'description': '',
1177 'uploader': '8KVIDEO',
1178 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1179 },
4bc3a23e
PH
1180 'params': {
1181 'youtube_include_dash_manifest': True,
1182 'format': '141',
4919603f 1183 },
de3c7fe0 1184 'skip': 'format 141 not served anymore',
dd27fd17 1185 },
8bdd16b4 1186 # DASH manifest with encrypted signature
1187 {
1188 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1189 'info_dict': {
1190 'id': 'IB3lcPjvWLA',
1191 'ext': 'm4a',
1192 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1193 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1194 'duration': 244,
1195 'uploader': 'AfrojackVEVO',
1196 'uploader_id': 'AfrojackVEVO',
1197 'upload_date': '20131011',
cc2db878 1198 'abr': 129.495,
8bdd16b4 1199 },
1200 'params': {
1201 'youtube_include_dash_manifest': True,
1202 'format': '141/bestaudio[ext=m4a]',
1203 },
1204 },
65c2fde2 1205 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1206 {
65c2fde2 1207 'note': 'Embed allowed age-gate video',
2d3d2997 1208 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1209 'info_dict': {
1210 'id': 'HtVdAasjOgU',
1211 'ext': 'mp4',
1212 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1213 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1214 'duration': 142,
c522adb1
JMF
1215 'uploader': 'The Witcher',
1216 'uploader_id': 'WitcherGame',
ec85ded8 1217 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1218 'upload_date': '20140605',
34952f09 1219 'age_limit': 18,
c522adb1
JMF
1220 },
1221 },
65c2fde2 1222 {
1223 'note': 'Age-gate video with embed allowed in public site',
1224 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1225 'info_dict': {
1226 'id': 'HsUATh_Nc2U',
1227 'ext': 'mp4',
1228 'title': 'Godzilla 2 (Official Video)',
1229 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1230 'upload_date': '20200408',
1231 'uploader_id': 'FlyingKitty900',
1232 'uploader': 'FlyingKitty',
1233 'age_limit': 18,
1234 },
1235 },
1236 {
1237 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1238 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1239 'info_dict': {
1240 'id': 'Tq92D6wQ1mg',
1241 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1242 'ext': 'mp4',
1243 'upload_date': '20191227',
65c2fde2 1244 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1245 'uploader': 'Projekt Melody',
1246 'description': 'md5:17eccca93a786d51bc67646756894066',
1247 'age_limit': 18,
1248 },
1249 },
1250 {
1251 'note': 'Non-Agegated non-embeddable video',
1252 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1253 'info_dict': {
1254 'id': 'MeJVWBSsPAY',
1255 'ext': 'mp4',
1256 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1257 'uploader': 'Herr Lurik',
1258 'uploader_id': 'st3in234',
1259 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1260 'upload_date': '20130730',
1261 },
1262 },
1263 {
1264 'note': 'Non-bypassable age-gated video',
1265 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1266 'only_matching': True,
1267 },
8bdd16b4 1268 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1269 # YouTube Red ad is not captured for creator
1270 {
1271 'url': '__2ABJjxzNo',
1272 'info_dict': {
1273 'id': '__2ABJjxzNo',
1274 'ext': 'mp4',
1275 'duration': 266,
1276 'upload_date': '20100430',
1277 'uploader_id': 'deadmau5',
1278 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1279 'creator': 'deadmau5',
1280 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1281 'uploader': 'deadmau5',
1282 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1283 'alt_title': 'Some Chords',
8bdd16b4 1284 },
1285 'expected_warnings': [
1286 'DASH manifest missing',
1287 ]
1288 },
067aa17e 1289 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1290 {
1291 'url': 'lqQg6PlCWgI',
1292 'info_dict': {
1293 'id': 'lqQg6PlCWgI',
1294 'ext': 'mp4',
556dbe7f 1295 'duration': 6085,
90227264 1296 'upload_date': '20150827',
cbe2bd91 1297 'uploader_id': 'olympic',
ec85ded8 1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1299 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1300 'uploader': 'Olympics',
cbe2bd91
PH
1301 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1302 },
1303 'params': {
1304 'skip_download': 'requires avconv',
e52a40ab 1305 }
cbe2bd91 1306 },
6271f1ca
PH
1307 # Non-square pixels
1308 {
1309 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1310 'info_dict': {
1311 'id': '_b-2C3KPAM0',
1312 'ext': 'mp4',
1313 'stretched_ratio': 16 / 9.,
556dbe7f 1314 'duration': 85,
6271f1ca
PH
1315 'upload_date': '20110310',
1316 'uploader_id': 'AllenMeow',
ec85ded8 1317 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1318 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1319 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1320 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1321 },
06b491eb
S
1322 },
1323 # url_encoded_fmt_stream_map is empty string
1324 {
1325 'url': 'qEJwOuvDf7I',
1326 'info_dict': {
1327 'id': 'qEJwOuvDf7I',
f57b7835 1328 'ext': 'webm',
06b491eb
S
1329 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1330 'description': '',
1331 'upload_date': '20150404',
1332 'uploader_id': 'spbelect',
1333 'uploader': 'Наблюдатели Петербурга',
1334 },
1335 'params': {
1336 'skip_download': 'requires avconv',
e323cf3f
S
1337 },
1338 'skip': 'This live event has ended.',
06b491eb 1339 },
067aa17e 1340 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1341 {
1342 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1343 'info_dict': {
1344 'id': 'FIl7x6_3R5Y',
eb6793ba 1345 'ext': 'webm',
da77d856
S
1346 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1347 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1348 'duration': 220,
da77d856
S
1349 'upload_date': '20150625',
1350 'uploader_id': 'dorappi2000',
ec85ded8 1351 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1352 'uploader': 'dorappi2000',
eb6793ba 1353 'formats': 'mincount:31',
da77d856 1354 },
eb6793ba 1355 'skip': 'not actual anymore',
2ee8f5d8 1356 },
8a1a26ce
YCH
1357 # DASH manifest with segment_list
1358 {
1359 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1360 'md5': '8ce563a1d667b599d21064e982ab9e31',
1361 'info_dict': {
1362 'id': 'CsmdDsKjzN8',
1363 'ext': 'mp4',
17ee98e1 1364 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1365 'uploader': 'Airtek',
1366 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1367 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1368 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1369 },
1370 'params': {
1371 'youtube_include_dash_manifest': True,
1372 'format': '135', # bestvideo
be49068d
S
1373 },
1374 'skip': 'This live event has ended.',
2ee8f5d8 1375 },
cf7e015f
S
1376 {
1377 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1378 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1379 'info_dict': {
545cc85d 1380 'id': 'jvGDaLqkpTg',
1381 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1382 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1383 },
1384 'playlist': [{
1385 'info_dict': {
545cc85d 1386 'id': 'jvGDaLqkpTg',
cf7e015f 1387 'ext': 'mp4',
545cc85d 1388 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1389 'description': 'md5:e03b909557865076822aa169218d6a5d',
1390 'duration': 10643,
1391 'upload_date': '20161111',
1392 'uploader': 'Team PGP',
1393 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1394 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1395 },
1396 }, {
1397 'info_dict': {
545cc85d 1398 'id': '3AKt1R1aDnw',
cf7e015f 1399 'ext': 'mp4',
545cc85d 1400 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1401 'description': 'md5:e03b909557865076822aa169218d6a5d',
1402 'duration': 10991,
1403 'upload_date': '20161111',
1404 'uploader': 'Team PGP',
1405 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1406 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1407 },
1408 }, {
1409 'info_dict': {
545cc85d 1410 'id': 'RtAMM00gpVc',
cf7e015f 1411 'ext': 'mp4',
545cc85d 1412 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1413 'description': 'md5:e03b909557865076822aa169218d6a5d',
1414 'duration': 10995,
1415 'upload_date': '20161111',
1416 'uploader': 'Team PGP',
1417 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1418 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1419 },
1420 }, {
1421 'info_dict': {
545cc85d 1422 'id': '6N2fdlP3C5U',
cf7e015f 1423 'ext': 'mp4',
545cc85d 1424 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1425 'description': 'md5:e03b909557865076822aa169218d6a5d',
1426 'duration': 10990,
1427 'upload_date': '20161111',
1428 'uploader': 'Team PGP',
1429 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1430 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1431 },
1432 }],
1433 'params': {
1434 'skip_download': True,
1435 },
65c2fde2 1436 'skip': 'Not multifeed anymore',
cbaed4bb 1437 },
f9f49d87 1438 {
067aa17e 1439 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1440 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1441 'info_dict': {
1442 'id': 'gVfLd0zydlo',
1443 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1444 },
1445 'playlist_count': 2,
be49068d 1446 'skip': 'Not multifeed anymore',
f9f49d87 1447 },
cbaed4bb 1448 {
2d3d2997 1449 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1450 'only_matching': True,
0e49d9a6 1451 },
6d4fc66b 1452 {
2d3d2997 1453 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1454 'only_matching': True,
1455 },
0e49d9a6 1456 {
067aa17e 1457 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1458 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1459 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1460 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1461 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1462 'info_dict': {
1463 'id': 'lsguqyKfVQg',
1464 'ext': 'mp4',
1465 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1466 'alt_title': 'Dark Walk',
0e49d9a6 1467 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1468 'duration': 133,
0e49d9a6
LL
1469 'upload_date': '20151119',
1470 'uploader_id': 'IronSoulElf',
ec85ded8 1471 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1472 'uploader': 'IronSoulElf',
11f9be09 1473 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1474 'track': 'Dark Walk',
1475 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1476 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1477 },
1478 'params': {
1479 'skip_download': True,
1480 },
1481 },
61f92af1 1482 {
067aa17e 1483 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1484 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1485 'only_matching': True,
1486 },
313dfc45
LL
1487 {
1488 # Video with yt:stretch=17:0
1489 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1490 'info_dict': {
1491 'id': 'Q39EVAstoRM',
1492 'ext': 'mp4',
1493 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1494 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1495 'upload_date': '20151107',
1496 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1497 'uploader': 'CH GAMER DROID',
1498 },
1499 'params': {
1500 'skip_download': True,
1501 },
be49068d 1502 'skip': 'This video does not exist.',
313dfc45 1503 },
201c1459 1504 {
1505 # Video with incomplete 'yt:stretch=16:'
1506 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1507 'only_matching': True,
1508 },
7caf9830
S
1509 {
1510 # Video licensed under Creative Commons
1511 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1512 'info_dict': {
1513 'id': 'M4gD1WSo5mA',
1514 'ext': 'mp4',
1515 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1516 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1517 'duration': 721,
7caf9830
S
1518 'upload_date': '20150127',
1519 'uploader_id': 'BerkmanCenter',
ec85ded8 1520 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1521 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1522 'license': 'Creative Commons Attribution license (reuse allowed)',
1523 },
1524 'params': {
1525 'skip_download': True,
1526 },
1527 },
fd050249
S
1528 {
1529 # Channel-like uploader_url
1530 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1531 'info_dict': {
1532 'id': 'eQcmzGIKrzg',
1533 'ext': 'mp4',
1534 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1535 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1536 'duration': 4060,
fd050249 1537 'upload_date': '20151119',
eb6793ba 1538 'uploader': 'Bernie Sanders',
fd050249 1539 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1540 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1541 'license': 'Creative Commons Attribution license (reuse allowed)',
1542 },
1543 'params': {
1544 'skip_download': True,
1545 },
1546 },
040ac686
S
1547 {
1548 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1549 'only_matching': True,
7f29cf54
S
1550 },
1551 {
067aa17e 1552 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1553 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1554 'only_matching': True,
6496ccb4
S
1555 },
1556 {
1557 # Rental video preview
1558 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1559 'info_dict': {
1560 'id': 'uGpuVWrhIzE',
1561 'ext': 'mp4',
1562 'title': 'Piku - Trailer',
1563 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1564 'upload_date': '20150811',
1565 'uploader': 'FlixMatrix',
1566 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1567 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1568 'license': 'Standard YouTube License',
1569 },
1570 'params': {
1571 'skip_download': True,
1572 },
eb6793ba 1573 'skip': 'This video is not available.',
022a5d66 1574 },
12afdc2a
S
1575 {
1576 # YouTube Red video with episode data
1577 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1578 'info_dict': {
1579 'id': 'iqKdEhx-dD4',
1580 'ext': 'mp4',
1581 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1582 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1583 'duration': 2085,
12afdc2a
S
1584 'upload_date': '20170118',
1585 'uploader': 'Vsauce',
1586 'uploader_id': 'Vsauce',
1587 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1588 'series': 'Mind Field',
1589 'season_number': 1,
1590 'episode_number': 1,
1591 },
1592 'params': {
1593 'skip_download': True,
1594 },
1595 'expected_warnings': [
1596 'Skipping DASH manifest',
1597 ],
1598 },
c7121fa7
S
1599 {
1600 # The following content has been identified by the YouTube community
1601 # as inappropriate or offensive to some audiences.
1602 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1603 'info_dict': {
1604 'id': '6SJNVb0GnPI',
1605 'ext': 'mp4',
1606 'title': 'Race Differences in Intelligence',
1607 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1608 'duration': 965,
1609 'upload_date': '20140124',
1610 'uploader': 'New Century Foundation',
1611 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1612 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1613 },
1614 'params': {
1615 'skip_download': True,
1616 },
545cc85d 1617 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1618 },
022a5d66
S
1619 {
1620 # itag 212
1621 'url': '1t24XAntNCY',
1622 'only_matching': True,
fd5c4aab
S
1623 },
1624 {
1625 # geo restricted to JP
1626 'url': 'sJL6WA-aGkQ',
1627 'only_matching': True,
1628 },
cd5a74a2
S
1629 {
1630 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1631 'only_matching': True,
1632 },
bc2ca1bb 1633 {
1634 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1635 'only_matching': True,
1636 },
1637 {
1638 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1639 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1640 'only_matching': True,
1641 },
825cd268
RA
1642 {
1643 # DRM protected
1644 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1645 'only_matching': True,
4fe54c12
S
1646 },
1647 {
1648 # Video with unsupported adaptive stream type formats
1649 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1650 'info_dict': {
1651 'id': 'Z4Vy8R84T1U',
1652 'ext': 'mp4',
1653 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1654 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1655 'duration': 433,
1656 'upload_date': '20130923',
1657 'uploader': 'Amelia Putri Harwita',
1658 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1659 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1660 'formats': 'maxcount:10',
1661 },
1662 'params': {
1663 'skip_download': True,
1664 'youtube_include_dash_manifest': False,
1665 },
5429d6a9 1666 'skip': 'not actual anymore',
5caabd3c 1667 },
1668 {
822b9d9c 1669 # Youtube Music Auto-generated description
5caabd3c 1670 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1671 'info_dict': {
1672 'id': 'MgNrAu2pzNs',
1673 'ext': 'mp4',
1674 'title': 'Voyeur Girl',
1675 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1676 'upload_date': '20190312',
5429d6a9
S
1677 'uploader': 'Stephen - Topic',
1678 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1679 'artist': 'Stephen',
1680 'track': 'Voyeur Girl',
1681 'album': 'it\'s too much love to know my dear',
1682 'release_date': '20190313',
1683 'release_year': 2019,
1684 },
1685 'params': {
1686 'skip_download': True,
1687 },
1688 },
66b48727
RA
1689 {
1690 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1691 'only_matching': True,
1692 },
011e75e6
S
1693 {
1694 # invalid -> valid video id redirection
1695 'url': 'DJztXj2GPfl',
1696 'info_dict': {
1697 'id': 'DJztXj2GPfk',
1698 'ext': 'mp4',
1699 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1700 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1701 'upload_date': '20090125',
1702 'uploader': 'Prochorowka',
1703 'uploader_id': 'Prochorowka',
1704 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1705 'artist': 'Panjabi MC',
1706 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1707 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1708 },
1709 'params': {
1710 'skip_download': True,
1711 },
545cc85d 1712 'skip': 'Video unavailable',
ea74e00b
DP
1713 },
1714 {
1715 # empty description results in an empty string
1716 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1717 'info_dict': {
1718 'id': 'x41yOUIvK2k',
1719 'ext': 'mp4',
1720 'title': 'IMG 3456',
1721 'description': '',
1722 'upload_date': '20170613',
1723 'uploader_id': 'ElevageOrVert',
1724 'uploader': 'ElevageOrVert',
1725 },
1726 'params': {
1727 'skip_download': True,
1728 },
1729 },
a0566bbf 1730 {
29f7c58a 1731 # with '};' inside yt initial data (see [1])
1732 # see [2] for an example with '};' inside ytInitialPlayerResponse
1733 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1734 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1735 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1736 'info_dict': {
1737 'id': 'CHqg6qOn4no',
1738 'ext': 'mp4',
1739 'title': 'Part 77 Sort a list of simple types in c#',
1740 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1741 'upload_date': '20130831',
1742 'uploader_id': 'kudvenkat',
1743 'uploader': 'kudvenkat',
1744 },
1745 'params': {
1746 'skip_download': True,
1747 },
1748 },
29f7c58a 1749 {
1750 # another example of '};' in ytInitialData
1751 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1752 'only_matching': True,
1753 },
1754 {
1755 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1756 'only_matching': True,
1757 },
545cc85d 1758 {
cc2db878 1759 # https://github.com/ytdl-org/youtube-dl/pull/28094
1760 'url': 'OtqTfy26tG0',
1761 'info_dict': {
1762 'id': 'OtqTfy26tG0',
1763 'ext': 'mp4',
1764 'title': 'Burn Out',
1765 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1766 'upload_date': '20141120',
1767 'uploader': 'The Cinematic Orchestra - Topic',
1768 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1769 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1770 'artist': 'The Cinematic Orchestra',
1771 'track': 'Burn Out',
1772 'album': 'Every Day',
1773 'release_data': None,
1774 'release_year': None,
1775 },
1776 'params': {
1777 'skip_download': True,
1778 },
545cc85d 1779 },
bc2ca1bb 1780 {
1781 # controversial video, only works with bpctr when authenticated with cookies
1782 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1783 'only_matching': True,
1784 },
a1a7907b 1785 {
1786 # controversial video, requires bpctr/contentCheckOk
1787 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1788 'info_dict': {
1789 'id': 'SZJvDhaSDnc',
1790 'ext': 'mp4',
1791 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1792 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1793 'uploader': 'CBS This Morning',
11f9be09 1794 'uploader_id': 'CBSThisMorning',
a1a7907b 1795 'upload_date': '20140716',
1796 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1797 }
1798 },
f7ad7160 1799 {
1800 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1801 'url': 'cBvYw8_A0vQ',
1802 'info_dict': {
1803 'id': 'cBvYw8_A0vQ',
1804 'ext': 'mp4',
1805 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1806 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1807 'upload_date': '20201120',
1808 'uploader': 'Walk around Japan',
1809 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1810 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1811 },
1812 'params': {
1813 'skip_download': True,
1814 },
0fb983f6 1815 }, {
1816 # Has multiple audio streams
1817 'url': 'WaOKSUlf4TM',
1818 'only_matching': True
9297939e 1819 }, {
1820 # Requires Premium: has format 141 when requested using YTM url
1821 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1822 'only_matching': True
1823 }, {
120916da 1824 # multiple subtitles with same lang_code
1825 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1826 'only_matching': True,
109dd3b2 1827 }, {
1828 # Force use android client fallback
1829 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1830 'info_dict': {
1831 'id': 'YOelRv7fMxY',
11f9be09 1832 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1833 'ext': '3gp',
1834 'upload_date': '20210624',
1835 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1836 'uploader': 'colinfurze',
11f9be09 1837 'uploader_id': 'colinfurze',
109dd3b2 1838 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1839 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1840 },
1841 'params': {
1842 'format': '17', # 3gp format available on android
1843 'extractor_args': {'youtube': {'player_client': ['android']}},
1844 },
120916da 1845 },
109dd3b2 1846 {
1847 # Skip download of additional client configs (remix client config in this case)
1848 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1849 'only_matching': True,
1850 'params': {
1851 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1852 },
8fc54b12 1853 }, {
1854 # shorts
1855 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1856 'only_matching': True,
1857 },
2eb88d95
PH
1858 ]
1859
201c1459 1860 @classmethod
1861 def suitable(cls, url):
4dfbf869 1862 from ..utils import parse_qs
1863
201c1459 1864 qs = parse_qs(url)
1865 if qs.get('list', [None])[0]:
1866 return False
1867 return super(YoutubeIE, cls).suitable(url)
1868
e0df6211
PH
1869 def __init__(self, *args, **kwargs):
1870 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1871 self._code_cache = {}
83799698 1872 self._player_cache = {}
e0df6211 1873
b6de707d 1874 def _extract_player_url(self, *ytcfgs, webpage=None):
1875 player_url = traverse_obj(
1876 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1877 get_all=False, expected_type=compat_str)
11f9be09 1878 if not player_url:
b6de707d 1879 return
109dd3b2 1880 if player_url.startswith('//'):
1881 player_url = 'https:' + player_url
1882 elif not re.match(r'https?://', player_url):
1883 player_url = compat_urlparse.urljoin(
1884 'https://www.youtube.com', player_url)
1885 return player_url
1886
b6de707d 1887 def _download_player_url(self, video_id, fatal=False):
1888 res = self._download_webpage(
1889 'https://www.youtube.com/iframe_api',
1890 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1891 if res:
1892 player_version = self._search_regex(
1893 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1894 if player_version:
1895 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1896
60064c53
PH
1897 def _signature_cache_id(self, example_sig):
1898 """ Return a string representation of a signature """
78caa52a 1899 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1900
e40c758c
S
1901 @classmethod
1902 def _extract_player_info(cls, player_url):
1903 for player_re in cls._PLAYER_INFO_RE:
1904 id_m = re.search(player_re, player_url)
1905 if id_m:
1906 break
1907 else:
c081b35c 1908 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1909 return id_m.group('id')
e40c758c 1910
109dd3b2 1911 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1912 player_id = self._extract_player_info(player_url)
1913 if player_id not in self._code_cache:
1914 self._code_cache[player_id] = self._download_webpage(
1915 player_url, video_id, fatal=fatal,
1916 note='Downloading player ' + player_id,
1917 errnote='Download of %s failed' % player_url)
1918 return player_id in self._code_cache
1919
e40c758c 1920 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1921 player_id = self._extract_player_info(player_url)
e0df6211 1922
c4417ddb 1923 # Read from filesystem cache
545cc85d 1924 func_id = 'js_%s_%s' % (
1925 player_id, self._signature_cache_id(example_sig))
c4417ddb 1926 assert os.path.basename(func_id) == func_id
a0e07d31 1927
69ea8ca4 1928 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1929 if cache_spec is not None:
78caa52a 1930 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1931
109dd3b2 1932 if self._load_player(video_id, player_url):
1933 code = self._code_cache[player_id]
1934 res = self._parse_sig_js(code)
e0df6211 1935
109dd3b2 1936 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1937 cache_res = res(test_string)
1938 cache_spec = [ord(c) for c in cache_res]
83799698 1939
109dd3b2 1940 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1941 return res
83799698 1942
60064c53 1943 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1944 def gen_sig_code(idxs):
1945 def _genslice(start, end, step):
78caa52a 1946 starts = '' if start == 0 else str(start)
8bcc8756 1947 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1948 steps = '' if step == 1 else (':%d' % step)
78caa52a 1949 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1950
1951 step = None
7af808a5
PH
1952 # Quelch pyflakes warnings - start will be set when step is set
1953 start = '(Never used)'
edf3e38e
PH
1954 for i, prev in zip(idxs[1:], idxs[:-1]):
1955 if step is not None:
1956 if i - prev == step:
1957 continue
1958 yield _genslice(start, prev, step)
1959 step = None
1960 continue
1961 if i - prev in [-1, 1]:
1962 step = i - prev
1963 start = prev
1964 continue
1965 else:
78caa52a 1966 yield 's[%d]' % prev
edf3e38e 1967 if step is None:
78caa52a 1968 yield 's[%d]' % i
edf3e38e
PH
1969 else:
1970 yield _genslice(start, i, step)
1971
78caa52a 1972 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1973 cache_res = func(test_string)
edf3e38e 1974 cache_spec = [ord(c) for c in cache_res]
78caa52a 1975 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1976 signature_id_tuple = '(%s)' % (
1977 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1978 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1979 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1980 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1981
e0df6211
PH
1982 def _parse_sig_js(self, jscode):
1983 funcname = self._search_regex(
abefc03f
S
1984 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1985 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
1986 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1987 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1988 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1989 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1990 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1991 # Obsolete patterns
1992 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1993 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1994 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1995 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1996 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1997 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1998 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1999 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2000 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2001
2002 jsi = JSInterpreter(jscode)
2003 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2004 return lambda s: initial_function([s])
2005
545cc85d 2006 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2007 """Turn the encrypted s field into a working signature"""
6b37f0be 2008
c8bf86d5 2009 if player_url is None:
69ea8ca4 2010 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 2011
c8bf86d5 2012 try:
62af3a0e 2013 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
2014 if player_id not in self._player_cache:
2015 func = self._extract_signature_function(
60064c53 2016 video_id, player_url, s
c8bf86d5
PH
2017 )
2018 self._player_cache[player_id] = func
2019 func = self._player_cache[player_id]
a06916d9 2020 if self.get_param('youtube_print_sig_code'):
60064c53 2021 self._print_sig_code(func, s)
c8bf86d5
PH
2022 return func(s)
2023 except Exception as e:
2024 tb = traceback.format_exc()
2025 raise ExtractorError(
78caa52a 2026 'Signature extraction failed: ' + tb, cause=e)
e0df6211 2027
109dd3b2 2028 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2029 """
2030 Extract signatureTimestamp (sts)
2031 Required to tell API what sig/player version is in use.
2032 """
2033 sts = None
2034 if isinstance(ytcfg, dict):
2035 sts = int_or_none(ytcfg.get('STS'))
2036
2037 if not sts:
2038 # Attempt to extract from player
2039 if player_url is None:
2040 error_msg = 'Cannot extract signature timestamp without player_url.'
2041 if fatal:
2042 raise ExtractorError(error_msg)
2043 self.report_warning(error_msg)
2044 return
2045 if self._load_player(video_id, player_url, fatal=fatal):
2046 player_id = self._extract_player_info(player_url)
2047 code = self._code_cache[player_id]
2048 sts = int_or_none(self._search_regex(
2049 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2050 'JS player signature timestamp', group='sts', fatal=fatal))
2051 return sts
2052
11f9be09 2053 def _mark_watched(self, video_id, player_responses):
352d63fd 2054 playback_url = traverse_obj(
2055 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2056 expected_type=url_or_none, get_all=False)
d77ab8e2 2057 if not playback_url:
352d63fd 2058 self.report_warning('Unable to mark watched')
d77ab8e2
S
2059 return
2060 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2061 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2062
2063 # cpn generation algorithm is reverse engineered from base.js.
2064 # In fact it works even with dummy cpn.
2065 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2066 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2067
2068 qs.update({
2069 'ver': ['2'],
2070 'cpn': [cpn],
2071 })
2072 playback_url = compat_urlparse.urlunparse(
15707c7e 2073 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
2074
2075 self._download_webpage(
2076 playback_url, video_id, 'Marking watched',
2077 'Unable to mark watched', fatal=False)
2078
66c9fa36
S
2079 @staticmethod
2080 def _extract_urls(webpage):
2081 # Embedded YouTube player
2082 entries = [
2083 unescapeHTML(mobj.group('url'))
2084 for mobj in re.finditer(r'''(?x)
2085 (?:
2086 <iframe[^>]+?src=|
2087 data-video-url=|
2088 <embed[^>]+?src=|
2089 embedSWF\(?:\s*|
2090 <object[^>]+data=|
2091 new\s+SWFObject\(
2092 )
2093 (["\'])
2094 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2095 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2096 \1''', webpage)]
2097
2098 # lazyYT YouTube embed
2099 entries.extend(list(map(
2100 unescapeHTML,
2101 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2102
2103 # Wordpress "YouTube Video Importer" plugin
2104 matches = re.findall(r'''(?x)<div[^>]+
2105 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2106 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2107 entries.extend(m[-1] for m in matches)
2108
2109 return entries
2110
2111 @staticmethod
2112 def _extract_url(webpage):
2113 urls = YoutubeIE._extract_urls(webpage)
2114 return urls[0] if urls else None
2115
97665381
PH
2116 @classmethod
2117 def extract_id(cls, url):
2118 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2119 if mobj is None:
69ea8ca4 2120 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 2121 return mobj.group('id')
c5e8d7af 2122
7c365c21 2123 def _extract_chapters_from_json(self, data, duration):
2124 chapter_list = traverse_obj(
2125 data, (
2126 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2127 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2128 ), expected_type=list)
2129
2130 return self._extract_chapters(
2131 chapter_list,
2132 chapter_time=lambda chapter: float_or_none(
2133 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2134 chapter_title=lambda chapter: traverse_obj(
2135 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2136 duration=duration)
2137
2138 def _extract_chapters_from_engagement_panel(self, data, duration):
2139 content_list = traverse_obj(
8bdd16b4 2140 data,
7c365c21 2141 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2142 expected_type=list, default=[])
052e1350 2143 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2144 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2145
2146 return next((
2147 filter(None, (
2148 self._extract_chapters(
2149 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2150 chapter_time, chapter_title, duration)
2151 for contents in content_list
2152 ))), [])
2153
2154 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2155 chapters = []
7c365c21 2156 last_chapter = {'start_time': 0}
2157 for idx, chapter in enumerate(chapter_list or []):
2158 title = chapter_title(chapter)
84213ea8
S
2159 start_time = chapter_time(chapter)
2160 if start_time is None:
2161 continue
7c365c21 2162 last_chapter['end_time'] = start_time
2163 if start_time < last_chapter['start_time']:
2164 if idx == 1:
2165 chapters.pop()
2166 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2167 else:
2168 self.report_warning(f'Invalid start time for chapter "{title}"')
2169 continue
2170 last_chapter = {'start_time': start_time, 'title': title}
2171 chapters.append(last_chapter)
2172 last_chapter['end_time'] = duration
84213ea8
S
2173 return chapters
2174
545cc85d 2175 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2176 return self._parse_json(self._search_regex(
2177 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2178 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2179
d92f5d5a 2180 @staticmethod
2181 def parse_time_text(time_text):
2182 """
2183 Parse the comment time text
2184 time_text is in the format 'X units ago (edited)'
2185 """
2186 time_text_split = time_text.split(' ')
2187 if len(time_text_split) >= 3:
da503b7a 2188 try:
2189 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2190 except ValueError:
2191 return None
d92f5d5a 2192
a1c5d2ca
M
2193 def _extract_comment(self, comment_renderer, parent=None):
2194 comment_id = comment_renderer.get('commentId')
2195 if not comment_id:
2196 return
fe93e2c4 2197
052e1350 2198 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2199
49bd8c66 2200 # note: timestamp is an estimate calculated from the current time and time_text
052e1350 2201 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
fe93e2c4 2202 time_text_dt = self.parse_time_text(time_text)
2203 if isinstance(time_text_dt, datetime.datetime):
2204 timestamp = calendar.timegm(time_text_dt.timetuple())
052e1350 2205 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2206 author_id = try_get(comment_renderer,
2207 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2208
49bd8c66 2209 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2210 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2211 author_thumbnail = try_get(comment_renderer,
2212 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2213
2214 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2215 is_favorited = 'creatorHeart' in (try_get(
2216 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2217 return {
2218 'id': comment_id,
2219 'text': text,
d92f5d5a 2220 'timestamp': timestamp,
a1c5d2ca
M
2221 'time_text': time_text,
2222 'like_count': votes,
97524332 2223 'is_favorited': is_favorited,
a1c5d2ca
M
2224 'author': author,
2225 'author_id': author_id,
2226 'author_thumbnail': author_thumbnail,
2227 'author_is_uploader': author_is_uploader,
2228 'parent': parent or 'root'
2229 }
2230
99e9e001 2231 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2d6659b9 2232
2233 def extract_header(contents):
2234 _total_comments = 0
2235 _continuation = None
2236 for content in contents:
2237 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2238 expected_comment_count = parse_count(self._get_text(
052e1350 2239 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2240
2d6659b9 2241 if expected_comment_count:
fe93e2c4 2242 comment_counts[1] = expected_comment_count
2243 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2244 _total_comments = comment_counts[1]
2245 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2246 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2247
2248 sort_menu_item = try_get(
2249 comments_header_renderer,
2250 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2251 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2252
2253 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2254 if not _continuation:
2255 continue
2256
2257 sort_text = sort_menu_item.get('title')
2258 if isinstance(sort_text, compat_str):
2259 sort_text = sort_text.lower()
2260 else:
2261 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2262 self.to_screen('Sorting comments by %s' % sort_text)
2263 break
2264 return _total_comments, _continuation
a1c5d2ca 2265
2d6659b9 2266 def extract_thread(contents):
a1c5d2ca
M
2267 if not parent:
2268 comment_counts[2] = 0
2269 for content in contents:
2270 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2271 comment_renderer = try_get(
2272 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2273 content, (lambda x: x['commentRenderer'], dict))
2274
2275 if not comment_renderer:
2276 continue
2277 comment = self._extract_comment(comment_renderer, parent)
2278 if not comment:
2279 continue
2280 comment_counts[0] += 1
2281 yield comment
2282 # Attempt to get the replies
2283 comment_replies_renderer = try_get(
2284 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2285
2286 if comment_replies_renderer:
2287 comment_counts[2] += 1
2288 comment_entries_iter = self._comment_entries(
99e9e001 2289 comment_replies_renderer, ytcfg, video_id,
2290 parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2291
2292 for reply_comment in comment_entries_iter:
2293 yield reply_comment
2294
2d6659b9 2295 # YouTube comments have a max depth of 2
2296 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2297 if max_depth == 1 and parent:
2298 return
a1c5d2ca
M
2299 if not comment_counts:
2300 # comment so far, est. total comments, current comment thread #
2301 comment_counts = [0, 0, 0]
a1c5d2ca 2302
2d6659b9 2303 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2304 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2305 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2306 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2307 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2308
2309 visitor_data = None
2310 is_first_continuation = parent is None
a1c5d2ca
M
2311
2312 for page_num in itertools.count(0):
2313 if not continuation:
2314 break
99e9e001 2315 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2d6659b9 2316 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2317 if page_num == 0:
2318 if is_first_continuation:
2319 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2320 else:
2d6659b9 2321 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2322 comment_counts[2], comment_prog_str)
2323 else:
2324 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2325 ' ' if parent else '', ' replies' if parent else '',
2326 page_num, comment_prog_str)
2327
2328 response = self._extract_response(
fe93e2c4 2329 item_id=None, query=continuation,
2d6659b9 2330 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2331 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2332 if not response:
2333 break
f4f751af 2334 visitor_data = try_get(
2335 response,
2336 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2337 compat_str) or visitor_data
a1c5d2ca 2338
2d6659b9 2339 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2340
2d6659b9 2341 continuation = None
2342 if isinstance(continuation_contents, list):
2343 for continuation_section in continuation_contents:
2344 if not isinstance(continuation_section, dict):
2345 continue
2346 continuation_items = try_get(
2347 continuation_section,
2348 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2349 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2350 list) or []
2351 if is_first_continuation:
2352 total_comments, continuation = extract_header(continuation_items)
2353 if total_comments:
2354 yield total_comments
2355 is_first_continuation = False
2356 if continuation:
2357 break
2358 continue
2359 count = 0
2360 for count, entry in enumerate(extract_thread(continuation_items)):
2361 yield entry
2362 continuation = self._extract_continuation({'contents': continuation_items})
2363 if continuation:
2364 # Sometimes YouTube provides a continuation without any comments
2365 # In most cases we end up just downloading these with very little comments to come.
2366 if count == 0:
2367 if not parent:
2368 self.report_warning('No comments received - assuming end of comments')
2369 continuation = None
a1c5d2ca
M
2370 break
2371
2d6659b9 2372 # Deprecated response structure
2373 elif isinstance(continuation_contents, dict):
2374 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2375 for key, continuation_renderer in continuation_contents.items():
2376 if key not in known_continuation_renderers:
2377 continue
2378 if not isinstance(continuation_renderer, dict):
2379 continue
2380 if is_first_continuation:
2381 header_continuation_items = [continuation_renderer.get('header') or {}]
2382 total_comments, continuation = extract_header(header_continuation_items)
2383 if total_comments:
2384 yield total_comments
2385 is_first_continuation = False
2386 if continuation:
2387 break
a1c5d2ca 2388
2d6659b9 2389 # Sometimes YouTube provides a continuation without any comments
2390 # In most cases we end up just downloading these with very little comments to come.
2391 count = 0
2392 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2393 yield entry
2394 continuation = self._extract_continuation(continuation_renderer)
2395 if count == 0:
2396 if not parent:
2397 self.report_warning('No comments received - assuming end of comments')
2398 continuation = None
2399 break
a1c5d2ca 2400
2d6659b9 2401 @staticmethod
2402 def _generate_comment_continuation(video_id):
2403 """
2404 Generates initial comment section continuation token from given video id
2405 """
2406 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2407 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2408 new_continuation_intlist = list(itertools.chain.from_iterable(
2409 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2410 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2411
2412 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2413 """Entry for comment extraction"""
2d6659b9 2414 def _real_comment_extract(contents):
99e9e001 2415 yield from self._comment_entries(
2416 traverse_obj(contents, (..., 'itemSectionRenderer'), get_all=False), ytcfg, video_id)
2417
a1c5d2ca 2418 comments = []
a1c5d2ca 2419 estimated_total = 0
2d6659b9 2420 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
65524694 2421 # Force English regardless of account setting to prevent parsing issues
2422 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2423 ytcfg = copy.deepcopy(ytcfg)
2424 traverse_obj(
2425 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2d6659b9 2426 try:
2427 for comment in _real_comment_extract(contents):
2428 if len(comments) >= max_comments:
2429 break
2430 if isinstance(comment, int):
2431 estimated_total = comment
2432 continue
2433 comments.append(comment)
2434 except KeyboardInterrupt:
2435 self.to_screen('Interrupted by user')
d92f5d5a 2436 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2437 return {
2438 'comments': comments,
2439 'comment_count': len(comments),
2440 }
2441
109dd3b2 2442 @staticmethod
99e9e001 2443 def _get_checkok_params():
2444 return {'contentCheckOk': True, 'racyCheckOk': True}
2445
2446 @classmethod
2447 def _generate_player_context(cls, sts=None):
109dd3b2 2448 context = {
2449 'html5Preference': 'HTML5_PREF_WANTS',
2450 }
2451 if sts is not None:
2452 context['signatureTimestamp'] = sts
2453 return {
2454 'playbackContext': {
2455 'contentPlaybackContext': context
a1a7907b 2456 },
99e9e001 2457 **cls._get_checkok_params()
109dd3b2 2458 }
2459
e7e94f2a
D
2460 @staticmethod
2461 def _is_agegated(player_response):
2462 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2463 return True
e7e94f2a
D
2464
2465 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2466 AGE_GATE_REASONS = (
2467 'confirm your age', 'age-restricted', 'inappropriate', # reason
2468 'age_verification_required', 'age_check_required', # status
2469 )
2470 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2471
2472 @staticmethod
2473 def _is_unplayable(player_response):
2474 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2475
99e9e001 2476 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 2477
11f9be09 2478 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2479 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 2480 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 2481 headers = self.generate_api_headers(
99e9e001 2482 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 2483
11f9be09 2484 yt_query = {'videoId': video_id}
2485 yt_query.update(self._generate_player_context(sts))
2486 return self._extract_response(
2487 item_id=video_id, ep='player', query=yt_query,
379e44ed 2488 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 2489 default_client=client,
11f9be09 2490 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2491 ) or None
2492
11f9be09 2493 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2494 requested_clients = []
000c15a4 2495 allowed_clients = sorted(
2496 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2497 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2498 for client in self._configuration_arg('player_client'):
2499 if client in allowed_clients:
2500 requested_clients.append(client)
2501 elif client == 'all':
2502 requested_clients.extend(allowed_clients)
2503 else:
2504 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2505 if not requested_clients:
2506 requested_clients = ['android', 'web']
cf7e015f 2507
11f9be09 2508 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2509 requested_clients.extend(
e7e94f2a 2510 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 2511
11f9be09 2512 return orderedSet(requested_clients)
cf7e015f 2513
c0bc527b
M
2514 def _extract_player_ytcfg(self, client, video_id):
2515 url = {
2516 'web_music': 'https://music.youtube.com',
2517 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2518 }.get(client)
2519 if not url:
2520 return {}
2521 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2522 return self.extract_ytcfg(video_id, webpage) or {}
2523
99e9e001 2524 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 2525 initial_pr = None
2526 if webpage:
2527 initial_pr = self._extract_yt_initial_variable(
2528 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2529 video_id, 'initial player response')
6b09401b 2530
c0bc527b
M
2531 original_clients = clients
2532 clients = clients[::-1]
b6de707d 2533 prs = []
e7e94f2a
D
2534
2535 def append_client(client_name):
2536 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2537 clients.append(client_name)
2538
379e44ed 2539 # Android player_response does not have microFormats which are needed for
2540 # extraction of some data. So we return the initial_pr with formats
2541 # stripped out even if not requested by the user
2542 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 2543 if initial_pr:
2544 pr = dict(initial_pr)
2545 pr['streamingData'] = None
b6de707d 2546 prs.append(pr)
379e44ed 2547
2548 last_error = None
b6de707d 2549 tried_iframe_fallback = False
2550 player_url = None
c0bc527b
M
2551 while clients:
2552 client = clients.pop()
11f9be09 2553 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2554 if 'configs' not in self._configuration_arg('player_skip'):
2555 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b 2556
b6de707d 2557 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2558 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2559 if 'js' in self._configuration_arg('player_skip'):
2560 require_js_player = False
2561 player_url = None
2562
2563 if not player_url and not tried_iframe_fallback and require_js_player:
2564 player_url = self._download_player_url(video_id)
2565 tried_iframe_fallback = True
2566
379e44ed 2567 try:
2568 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 2569 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 2570 except ExtractorError as e:
2571 if last_error:
2572 self.report_warning(last_error)
2573 last_error = e
2574 continue
2575
11f9be09 2576 if pr:
b6de707d 2577 prs.append(pr)
c0bc527b 2578
e7e94f2a 2579 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
99e9e001 2580 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
e7e94f2a
D
2581 append_client(client.replace('_agegate', '_creator'))
2582 elif self._is_agegated(pr):
2583 append_client(f'{client}_agegate')
c0bc527b 2584
379e44ed 2585 if last_error:
b6de707d 2586 if not len(prs):
379e44ed 2587 raise last_error
2588 self.report_warning(last_error)
b6de707d 2589 return prs, player_url
11f9be09 2590
2591 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2592 itags, stream_ids = [], []
2a9c6dcd 2593 itag_qualities, res_qualities = {}, {}
d3fc8074 2594 q = qualities([
2a9c6dcd 2595 # Normally tiny is the smallest video-only formats. But
2596 # audio-only formats with unknown quality may get tagged as tiny
2597 'tiny',
2598 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2599 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2600 ])
11f9be09 2601 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2602
545cc85d 2603 for fmt in streaming_formats:
2604 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2605 continue
321bf820 2606
cc2db878 2607 itag = str_or_none(fmt.get('itag'))
9297939e 2608 audio_track = fmt.get('audioTrack') or {}
2609 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2610 if stream_id in stream_ids:
2611 continue
2612
cc2db878 2613 quality = fmt.get('quality')
2a9c6dcd 2614 height = int_or_none(fmt.get('height'))
d3fc8074 2615 if quality == 'tiny' or not quality:
2616 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2617 # The 3gp format (17) in android client has a quality of "small",
2618 # but is actually worse than other formats
2619 if itag == '17':
2620 quality = 'tiny'
2621 if quality:
2622 if itag:
2623 itag_qualities[itag] = quality
2624 if height:
2625 res_qualities[height] = quality
cc2db878 2626 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2627 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2628 # number of fragment that would subsequently requested with (`&sq=N`)
2629 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2630 continue
2631
545cc85d 2632 fmt_url = fmt.get('url')
2633 if not fmt_url:
2634 sc = compat_parse_qs(fmt.get('signatureCipher'))
2635 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2636 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2637 if not (sc and fmt_url and encrypted_sig):
2638 continue
545cc85d 2639 if not player_url:
201e9eaa 2640 continue
545cc85d 2641 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2642 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2643 fmt_url += '&' + sp + '=' + signature
2644
545cc85d 2645 if itag:
2646 itags.append(itag)
9297939e 2647 stream_ids.append(stream_id)
2648
cc2db878 2649 tbr = float_or_none(
2650 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2651 dct = {
2652 'asr': int_or_none(fmt.get('audioSampleRate')),
2653 'filesize': int_or_none(fmt.get('contentLength')),
2654 'format_id': itag,
11f9be09 2655 'format_note': ', '.join(filter(None, (
26e8e044 2656 '%s%s' % (audio_track.get('displayName') or '',
2657 ' (default)' if audio_track.get('audioIsDefault') else ''),
2a9c6dcd 2658 fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
545cc85d 2659 'fps': int_or_none(fmt.get('fps')),
2a9c6dcd 2660 'height': height,
dca3ff4a 2661 'quality': q(quality),
cc2db878 2662 'tbr': tbr,
545cc85d 2663 'url': fmt_url,
2a9c6dcd 2664 'width': int_or_none(fmt.get('width')),
0fb983f6 2665 'language': audio_track.get('id', '').split('.')[0],
26e8e044 2666 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
545cc85d 2667 }
60bdb7bd 2668 mime_mobj = re.match(
2669 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2670 if mime_mobj:
2671 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2672 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2673 no_audio = dct.get('acodec') == 'none'
2674 no_video = dct.get('vcodec') == 'none'
2675 if no_audio:
2676 dct['vbr'] = tbr
2677 if no_video:
2678 dct['abr'] = tbr
2679 if no_audio or no_video:
545cc85d 2680 dct['downloader_options'] = {
2681 # Youtube throttles chunks >~10M
2682 'http_chunk_size': 10485760,
bf1317d2 2683 }
7c60c33e 2684 if dct.get('ext'):
2685 dct['container'] = dct['ext'] + '_dash'
11f9be09 2686 yield dct
545cc85d 2687
4bb6b02f 2688 skip_manifests = self._configuration_arg('skip')
57015a4a 2689 get_dash = (
2690 (not is_live or self._configuration_arg('include_live_dash'))
2691 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
5d3a0e79 2692 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2693
2a9c6dcd 2694 def guess_quality(f):
2695 for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2696 if val in qdict:
2697 return q(qdict[val])
2698 return -1
2699
11f9be09 2700 for sd in streaming_data:
5d3a0e79 2701 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2702 if hls_manifest_url:
2a9c6dcd 2703 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
9297939e 2704 itag = self._search_regex(
2705 r'/itag/(\d+)', f['url'], 'itag', default=None)
11f9be09 2706 if itag in itags:
2707 continue
9297939e 2708 if itag:
2709 f['format_id'] = itag
11f9be09 2710 itags.append(itag)
2a9c6dcd 2711 f['quality'] = guess_quality(f)
11f9be09 2712 yield f
545cc85d 2713
5d3a0e79 2714 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2715 if dash_manifest_url:
2a9c6dcd 2716 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
5d3a0e79 2717 itag = f['format_id']
2718 if itag in itags:
2719 continue
11f9be09 2720 if itag:
2721 itags.append(itag)
2a9c6dcd 2722 f['quality'] = guess_quality(f)
5d3a0e79 2723 filesize = int_or_none(self._search_regex(
2724 r'/clen/(\d+)', f.get('fragment_base_url')
2725 or f['url'], 'file size', default=None))
2726 if filesize:
2727 f['filesize'] = filesize
11f9be09 2728 yield f
2729
2730 def _real_extract(self, url):
2731 url, smuggled_data = unsmuggle_url(url, {})
2732 video_id = self._match_id(url)
2733
2734 base_url = self.http_scheme() + '//www.youtube.com/'
2735 webpage_url = base_url + 'watch?v=' + video_id
b6de707d 2736 webpage = None
2737 if 'webpage' not in self._configuration_arg('player_skip'):
2738 webpage = self._download_webpage(
2739 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
11f9be09 2740
2741 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 2742
b6de707d 2743 player_responses, player_url = self._extract_player_responses(
11f9be09 2744 self._get_requested_clients(url, smuggled_data),
99e9e001 2745 video_id, webpage, master_ytcfg)
11f9be09 2746
352d63fd 2747 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2748
2749 playability_statuses = traverse_obj(
2750 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2751
2752 trailer_video_id = get_first(
2753 playability_statuses,
2754 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2755 expected_type=str)
2756 if trailer_video_id:
2757 return self.url_result(
2758 trailer_video_id, self.ie_key(), trailer_video_id)
2759
2760 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2761 if webpage else (lambda x: None))
2762
2763 video_details = traverse_obj(
2764 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2765 microformats = traverse_obj(
2766 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2767 expected_type=dict, default=[])
2768 video_title = (
2769 get_first(video_details, 'title')
2770 or self._get_text(microformats, (..., 'title'))
2771 or search_meta(['og:title', 'twitter:title', 'title']))
2772 video_description = get_first(video_details, 'shortDescription')
2773
2774 if not smuggled_data.get('force_singlefeed', False):
2775 if not self.get_param('noplaylist'):
2776 multifeed_metadata_list = get_first(
2777 player_responses,
2778 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2779 expected_type=str)
2780 if multifeed_metadata_list:
2781 entries = []
2782 feed_ids = []
2783 for feed in multifeed_metadata_list.split(','):
2784 # Unquote should take place before split on comma (,) since textual
2785 # fields may contain comma as well (see
2786 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2787 feed_data = compat_parse_qs(
2788 compat_urllib_parse_unquote_plus(feed))
2789
2790 def feed_entry(name):
2791 return try_get(
2792 feed_data, lambda x: x[name][0], compat_str)
2793
2794 feed_id = feed_entry('id')
2795 if not feed_id:
2796 continue
2797 feed_title = feed_entry('title')
2798 title = video_title
2799 if feed_title:
2800 title += ' (%s)' % feed_title
2801 entries.append({
2802 '_type': 'url_transparent',
2803 'ie_key': 'Youtube',
2804 'url': smuggle_url(
2805 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2806 {'force_singlefeed': True}),
2807 'title': title,
2808 })
2809 feed_ids.append(feed_id)
2810 self.to_screen(
2811 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2812 % (', '.join(feed_ids), video_id))
2813 return self.playlist_result(
2814 entries, video_id, video_title, video_description)
2815 else:
2816 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2817
7ea65411 2818 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2819 is_live = get_first(video_details, 'isLive')
7ea65411 2820 if is_live is None:
2821 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2822
2823 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2824 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2825
545cc85d 2826 if not formats:
11f9be09 2827 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 2828 self.report_drm(video_id)
11f9be09 2829 pemr = get_first(
2830 playability_statuses,
2831 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2832 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2833 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2834 if subreason:
545cc85d 2835 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2836 countries = get_first(microformats, 'availableCountries')
545cc85d 2837 if not countries:
2838 regions_allowed = search_meta('regionsAllowed')
2839 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2840 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2841 reason += f'. {subreason}'
545cc85d 2842 if reason:
b7da73eb 2843 self.raise_no_formats(reason, expected=True)
bf1317d2 2844
11f9be09 2845 for f in formats:
2a9c6dcd 2846 if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
11f9be09 2847 f['source_preference'] = -10
3619f78d 2848 # TODO: this method is not reliable
2849 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
11f9be09 2850
2a9c6dcd 2851 # Source is given priority since formats that throttle are given lower source_preference
2852 # When throttling issue is fully fixed, remove this
c311988d 2853 self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang'))
bf1317d2 2854
11f9be09 2855 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2856 if not keywords and webpage:
2857 keywords = [
2858 unescapeHTML(m.group('content'))
2859 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2860 for keyword in keywords:
2861 if keyword.startswith('yt:stretch='):
201c1459 2862 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2863 if mobj:
2864 # NB: float is intentional for forcing float division
2865 w, h = (float(v) for v in mobj.groups())
2866 if w > 0 and h > 0:
2867 ratio = w / h
2868 for f in formats:
2869 if f.get('vcodec') != 'none':
2870 f['stretched_ratio'] = ratio
2871 break
6449cd80 2872
545cc85d 2873 thumbnails = []
11f9be09 2874 thumbnail_dicts = traverse_obj(
2875 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2876 expected_type=dict, default=[])
2877 for thumbnail in thumbnail_dicts:
2878 thumbnail_url = thumbnail.get('url')
2879 if not thumbnail_url:
2880 continue
2881 # Sometimes youtube gives a wrong thumbnail URL. See:
2882 # https://github.com/yt-dlp/yt-dlp/issues/233
2883 # https://github.com/ytdl-org/youtube-dl/issues/28023
2884 if 'maxresdefault' in thumbnail_url:
2885 thumbnail_url = thumbnail_url.split('?')[0]
2886 thumbnails.append({
2887 'url': thumbnail_url,
2888 'height': int_or_none(thumbnail.get('height')),
2889 'width': int_or_none(thumbnail.get('width')),
2890 })
ff2751ac 2891 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2892 if thumbnail_url:
2893 thumbnails.append({
2894 'url': thumbnail_url,
ff2751ac 2895 })
0ba692ac 2896 # The best resolution thumbnails sometimes does not appear in the webpage
2897 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2898 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2899 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
245524e6 2900 # TODO: Test them also? - For some videos, even these don't exist
cca80fe6 2901 guaranteed_thumbnail_names = [
2902 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2903 'mqdefault', 'mq1', 'mq2', 'mq3',
2904 'default', '1', '2', '3'
2905 ]
2906 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2907 n_thumbnail_names = len(thumbnail_names)
2908
0ba692ac 2909 thumbnails.extend({
2910 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2911 video_id=video_id, name=name, ext=ext,
2912 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2913 '_test_url': name in hq_thumbnail_names,
2914 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2915 for thumb in thumbnails:
cca80fe6 2916 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2917 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2918 self._remove_duplicate_formats(thumbnails)
545cc85d 2919
7ea65411 2920 category = get_first(microformats, 'category') or search_meta('genre')
2921 channel_id = str_or_none(
2922 get_first(video_details, 'channelId')
2923 or get_first(microformats, 'externalChannelId')
2924 or search_meta('channelId'))
2925 duration = int_or_none(
2926 get_first(video_details, 'lengthSeconds')
2927 or get_first(microformats, 'lengthSeconds')
2928 or parse_duration(search_meta('duration'))) or None
2929 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2930
2931 live_content = get_first(video_details, 'isLiveContent')
2932 is_upcoming = get_first(video_details, 'isUpcoming')
2933 if is_live is None:
2934 if is_upcoming or live_content is False:
2935 is_live = False
2936 if is_upcoming is None and (live_content or is_live):
2937 is_upcoming = False
2938 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2939 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2940 if not duration and live_endtime and live_starttime:
2941 duration = live_endtime - live_starttime
2942
545cc85d 2943 info = {
2944 'id': video_id,
2945 'title': self._live_title(video_title) if is_live else video_title,
2946 'formats': formats,
2947 'thumbnails': thumbnails,
2948 'description': video_description,
2949 'upload_date': unified_strdate(
11f9be09 2950 get_first(microformats, 'uploadDate')
545cc85d 2951 or search_meta('uploadDate')),
11f9be09 2952 'uploader': get_first(video_details, 'author'),
545cc85d 2953 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2954 'uploader_url': owner_profile_url,
2955 'channel_id': channel_id,
11f9be09 2956 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2957 'duration': duration,
2958 'view_count': int_or_none(
11f9be09 2959 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2960 or search_meta('interactionCount')),
11f9be09 2961 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2962 'age_limit': 18 if (
11f9be09 2963 get_first(microformats, 'isFamilySafe') is False
545cc85d 2964 or search_meta('isFamilyFriendly') == 'false'
2965 or search_meta('og:restrictions:age') == '18+') else 0,
2966 'webpage_url': webpage_url,
2967 'categories': [category] if category else None,
2968 'tags': keywords,
11f9be09 2969 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2970 'is_live': is_live,
2971 'was_live': (False if is_live or is_upcoming or live_content is False
2972 else None if is_live is None or is_upcoming is None
2973 else live_content),
2974 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2975 'release_timestamp': live_starttime,
545cc85d 2976 }
b477fc13 2977
3944e7af 2978 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2979 # Converted into dicts to remove duplicates
2980 captions = {
2981 sub.get('baseUrl'): sub
2982 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2983 translation_languages = {
2984 lang.get('languageCode'): lang.get('languageName')
2985 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
545cc85d 2986 subtitles = {}
2987 if pctr:
774d79cc 2988 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2989 lang_subs = container.setdefault(lang_code, [])
545cc85d 2990 for fmt in self._SUBTITLE_FORMATS:
2991 query.update({
2992 'fmt': fmt,
2993 })
2994 lang_subs.append({
2995 'ext': fmt,
2996 'url': update_url_query(base_url, query),
774d79cc 2997 'name': sub_name,
545cc85d 2998 })
7e72694b 2999
3944e7af 3000 for base_url, caption_track in captions.items():
545cc85d 3001 if not base_url:
3002 continue
3003 if caption_track.get('kind') != 'asr':
120916da 3004 lang_code = (
3005 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
3006 or caption_track.get('languageCode'))
545cc85d 3007 if not lang_code:
3008 continue
3009 process_language(
774d79cc 3010 subtitles, base_url, lang_code,
a7429aa9 3011 traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False),
774d79cc 3012 {})
545cc85d 3013 continue
3014 automatic_captions = {}
3944e7af 3015 for trans_code, trans_name in translation_languages.items():
3016 if not trans_code:
545cc85d 3017 continue
3018 process_language(
3944e7af 3019 automatic_captions, base_url, trans_code,
3020 self._get_text(trans_name, max_runs=1),
3021 {'tlang': trans_code})
545cc85d 3022 info['automatic_captions'] = automatic_captions
3023 info['subtitles'] = subtitles
7e72694b 3024
545cc85d 3025 parsed_url = compat_urllib_parse_urlparse(url)
3026 for component in [parsed_url.fragment, parsed_url.query]:
3027 query = compat_parse_qs(component)
3028 for k, v in query.items():
3029 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3030 d_k += '_time'
3031 if d_k not in info and k in s_ks:
3032 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3033
3034 # Youtube Music Auto-generated description
822b9d9c 3035 if video_description:
38d70284 3036 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 3037 if mobj:
822b9d9c
RA
3038 release_year = mobj.group('release_year')
3039 release_date = mobj.group('release_date')
3040 if release_date:
3041 release_date = release_date.replace('-', '')
3042 if not release_year:
545cc85d 3043 release_year = release_date[:4]
3044 info.update({
3045 'album': mobj.group('album'.strip()),
3046 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3047 'track': mobj.group('track').strip(),
3048 'release_date': release_date,
cc2db878 3049 'release_year': int_or_none(release_year),
545cc85d 3050 })
7e72694b 3051
545cc85d 3052 initial_data = None
3053 if webpage:
3054 initial_data = self._extract_yt_initial_variable(
3055 webpage, self._YT_INITIAL_DATA_RE, video_id,
3056 'yt initial data')
3057 if not initial_data:
99e9e001 3058 query = {'videoId': video_id}
3059 query.update(self._get_checkok_params())
109dd3b2 3060 initial_data = self._extract_response(
3061 item_id=video_id, ep='next', fatal=False,
99e9e001 3062 ytcfg=master_ytcfg, query=query,
3063 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 3064 note='Downloading initial data API JSON')
545cc85d 3065
c60ee3a2 3066 try:
3067 # This will error if there is no livechat
3068 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3069 info['subtitles']['live_chat'] = [{
3070 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3071 'video_id': video_id,
3072 'ext': 'json',
f6745c49 3073 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3074 }]
3075 except (KeyError, IndexError, TypeError):
3076 pass
545cc85d 3077
3078 if initial_data:
7c365c21 3079 info['chapters'] = (
3080 self._extract_chapters_from_json(initial_data, duration)
3081 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3082 or None)
545cc85d 3083
3084 contents = try_get(
3085 initial_data,
3086 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3087 list) or []
3088 for content in contents:
3089 vpir = content.get('videoPrimaryInfoRenderer')
3090 if vpir:
3091 stl = vpir.get('superTitleLink')
3092 if stl:
fe93e2c4 3093 stl = self._get_text(stl)
545cc85d 3094 if try_get(
3095 vpir,
3096 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3097 info['location'] = stl
3098 else:
3099 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3100 if mobj:
3101 info.update({
3102 'series': mobj.group(1),
3103 'season_number': int(mobj.group(2)),
3104 'episode_number': int(mobj.group(3)),
3105 })
3106 for tlb in (try_get(
3107 vpir,
3108 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3109 list) or []):
3110 tbr = tlb.get('toggleButtonRenderer') or {}
3111 for getter, regex in [(
3112 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3113 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3114 lambda x: x['accessibility'],
3115 lambda x: x['accessibilityData']['accessibilityData'],
3116 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3117 label = (try_get(tbr, getter, dict) or {}).get('label')
3118 if label:
3119 mobj = re.match(regex, label)
3120 if mobj:
3121 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3122 break
3123 sbr_tooltip = try_get(
3124 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3125 if sbr_tooltip:
3126 like_count, dislike_count = sbr_tooltip.split(' / ')
3127 info.update({
3128 'like_count': str_to_int(like_count),
3129 'dislike_count': str_to_int(dislike_count),
3130 })
3131 vsir = content.get('videoSecondaryInfoRenderer')
3132 if vsir:
052e1350 3133 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3134 rows = try_get(
3135 vsir,
3136 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3137 list) or []
3138 multiple_songs = False
3139 for row in rows:
3140 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3141 multiple_songs = True
3142 break
3143 for row in rows:
3144 mrr = row.get('metadataRowRenderer') or {}
3145 mrr_title = mrr.get('title')
3146 if not mrr_title:
3147 continue
052e1350 3148 mrr_title = self._get_text(mrr, 'title')
3149 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3150 if mrr_title == 'License':
3151 info['license'] = mrr_contents_text
3152 elif not multiple_songs:
3153 if mrr_title == 'Album':
3154 info['album'] = mrr_contents_text
3155 elif mrr_title == 'Artist':
3156 info['artist'] = mrr_contents_text
3157 elif mrr_title == 'Song':
3158 info['track'] = mrr_contents_text
3159
3160 fallbacks = {
3161 'channel': 'uploader',
3162 'channel_id': 'uploader_id',
3163 'channel_url': 'uploader_url',
3164 }
3165 for to, frm in fallbacks.items():
3166 if not info.get(to):
3167 info[to] = info.get(frm)
3168
3169 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3170 v = info.get(s_k)
3171 if v:
3172 info[d_k] = v
b84071c0 3173
11f9be09 3174 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3175 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3176 is_membersonly = None
b28f8d24 3177 is_premium = None
c224251a
M
3178 if initial_data and is_private is not None:
3179 is_membersonly = False
b28f8d24 3180 is_premium = False
47193e02 3181 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3182 badge_labels = set()
3183 for content in contents:
3184 if not isinstance(content, dict):
3185 continue
3186 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3187 for badge_label in badge_labels:
3188 if badge_label.lower() == 'members only':
3189 is_membersonly = True
3190 elif badge_label.lower() == 'premium':
3191 is_premium = True
3192 elif badge_label.lower() == 'unlisted':
3193 is_unlisted = True
c224251a 3194
c224251a
M
3195 info['availability'] = self._availability(
3196 is_private=is_private,
b28f8d24 3197 needs_premium=is_premium,
c224251a
M
3198 needs_subscription=is_membersonly,
3199 needs_auth=info['age_limit'] >= 18,
3200 is_unlisted=None if is_private is None else is_unlisted)
3201
0bb1bc1b 3202 if self.get_param('getcomments', False):
11f9be09 3203 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3204
11f9be09 3205 self.mark_watched(video_id, player_responses)
d77ab8e2 3206
545cc85d 3207 return info
c5e8d7af 3208
5f6a1245 3209
8bdd16b4 3210class YoutubeTabIE(YoutubeBaseInfoExtractor):
3211 IE_DESC = 'YouTube.com tab'
70d5c17b 3212 _VALID_URL = r'''(?x)
3213 https?://
3214 (?:\w+\.)?
3215 (?:
3216 youtube(?:kids)?\.com|
3217 invidio\.us
3218 )/
3219 (?:
fe03a6cd 3220 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3221 (?P<not_channel>
9ba5705a 3222 feed/|hashtag/|
70d5c17b 3223 (?:playlist|watch)\?.*?\blist=
3224 )|
29f7c58a 3225 (?!(?:%s)\b) # Direct URLs
70d5c17b 3226 )
3227 (?P<id>[^/?\#&]+)
3228 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3229 IE_NAME = 'youtube:tab'
3230
81127aa5 3231 _TESTS = [{
da692b79 3232 'note': 'playlists, multipage',
8bdd16b4 3233 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3234 'playlist_mincount': 94,
3235 'info_dict': {
3236 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3237 'title': 'Игорь Клейнер - Playlists',
3238 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3239 'uploader': 'Игорь Клейнер',
3240 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3241 },
3242 }, {
da692b79 3243 'note': 'playlists, multipage, different order',
8bdd16b4 3244 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3245 'playlist_mincount': 94,
3246 'info_dict': {
3247 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3248 'title': 'Игорь Клейнер - Playlists',
3249 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3250 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3251 'uploader': 'Игорь Клейнер',
8bdd16b4 3252 },
201c1459 3253 }, {
da692b79 3254 'note': 'playlists, series',
201c1459 3255 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3256 'playlist_mincount': 5,
3257 'info_dict': {
3258 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3259 'title': '3Blue1Brown - Playlists',
3260 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3261 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3262 'uploader': '3Blue1Brown',
201c1459 3263 },
8bdd16b4 3264 }, {
da692b79 3265 'note': 'playlists, singlepage',
8bdd16b4 3266 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3267 'playlist_mincount': 4,
3268 'info_dict': {
3269 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3270 'title': 'ThirstForScience - Playlists',
3271 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3272 'uploader': 'ThirstForScience',
3273 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3274 }
3275 }, {
3276 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3277 'only_matching': True,
3278 }, {
da692b79 3279 'note': 'basic, single video playlist',
0e30a7b9 3280 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3281 'info_dict': {
0e30a7b9 3282 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3283 'uploader': 'Sergey M.',
3284 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3285 'title': 'youtube-dl public playlist',
81127aa5 3286 },
0e30a7b9 3287 'playlist_count': 1,
9291475f 3288 }, {
da692b79 3289 'note': 'empty playlist',
0e30a7b9 3290 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3291 'info_dict': {
0e30a7b9 3292 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3293 'uploader': 'Sergey M.',
3294 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3295 'title': 'youtube-dl empty playlist',
9291475f
PH
3296 },
3297 'playlist_count': 0,
3298 }, {
da692b79 3299 'note': 'Home tab',
8bdd16b4 3300 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3301 'info_dict': {
8bdd16b4 3302 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3303 'title': 'lex will - Home',
3304 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3305 'uploader': 'lex will',
3306 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3307 },
8bdd16b4 3308 'playlist_mincount': 2,
9291475f 3309 }, {
da692b79 3310 'note': 'Videos tab',
8bdd16b4 3311 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3312 'info_dict': {
8bdd16b4 3313 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3314 'title': 'lex will - Videos',
3315 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3316 'uploader': 'lex will',
3317 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3318 },
8bdd16b4 3319 'playlist_mincount': 975,
9291475f 3320 }, {
da692b79 3321 'note': 'Videos tab, sorted by popular',
8bdd16b4 3322 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3323 'info_dict': {
8bdd16b4 3324 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3325 'title': 'lex will - Videos',
3326 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3327 'uploader': 'lex will',
3328 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3329 },
8bdd16b4 3330 'playlist_mincount': 199,
9291475f 3331 }, {
da692b79 3332 'note': 'Playlists tab',
8bdd16b4 3333 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3334 'info_dict': {
8bdd16b4 3335 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3336 'title': 'lex will - Playlists',
3337 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3338 'uploader': 'lex will',
3339 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3340 },
8bdd16b4 3341 'playlist_mincount': 17,
ac7553d0 3342 }, {
da692b79 3343 'note': 'Community tab',
8bdd16b4 3344 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3345 'info_dict': {
8bdd16b4 3346 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3347 'title': 'lex will - Community',
3348 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3349 'uploader': 'lex will',
3350 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3351 },
3352 'playlist_mincount': 18,
87dadd45 3353 }, {
da692b79 3354 'note': 'Channels tab',
8bdd16b4 3355 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3356 'info_dict': {
8bdd16b4 3357 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3358 'title': 'lex will - Channels',
3359 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3360 'uploader': 'lex will',
3361 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3362 },
deaec5af 3363 'playlist_mincount': 12,
cd684175 3364 }, {
3365 'note': 'Search tab',
3366 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3367 'playlist_mincount': 40,
3368 'info_dict': {
3369 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3370 'title': '3Blue1Brown - Search - linear algebra',
3371 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3372 'uploader': '3Blue1Brown',
3373 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3374 },
6b08cdf6 3375 }, {
a0566bbf 3376 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3377 'only_matching': True,
3378 }, {
a0566bbf 3379 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3380 'only_matching': True,
3381 }, {
a0566bbf 3382 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3383 'only_matching': True,
3384 }, {
3385 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3386 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3387 'info_dict': {
3388 'title': '29C3: Not my department',
3389 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3390 'uploader': 'Christiaan008',
3391 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3392 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3393 },
3394 'playlist_count': 96,
3395 }, {
3396 'note': 'Large playlist',
3397 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3398 'info_dict': {
8bdd16b4 3399 'title': 'Uploads from Cauchemar',
3400 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3401 'uploader': 'Cauchemar',
3402 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3403 },
8bdd16b4 3404 'playlist_mincount': 1123,
3405 }, {
da692b79 3406 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3407 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3408 'only_matching': True,
4b7df0d3
JMF
3409 }, {
3410 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3411 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3412 'info_dict': {
acf757f4
PH
3413 'title': 'Uploads from Interstellar Movie',
3414 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3415 'uploader': 'Interstellar Movie',
8bdd16b4 3416 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3417 },
481cc733 3418 'playlist_mincount': 21,
358de58c 3419 }, {
3420 'note': 'Playlist with "show unavailable videos" button',
3421 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3422 'info_dict': {
3423 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3424 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3425 'uploader': 'Phim Siêu Nhân Nhật Bản',
3426 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3427 },
da692b79 3428 'playlist_mincount': 200,
5d342002 3429 }, {
da692b79 3430 'note': 'Playlist with unavailable videos in page 7',
5d342002 3431 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3432 'info_dict': {
3433 'title': 'Uploads from BlankTV',
3434 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3435 'uploader': 'BlankTV',
3436 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3437 },
da692b79 3438 'playlist_mincount': 1000,
8bdd16b4 3439 }, {
da692b79 3440 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3441 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3442 'info_dict': {
3443 'title': 'Data Analysis with Dr Mike Pound',
3444 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3445 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3446 'uploader': 'Computerphile',
deaec5af 3447 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3448 },
3449 'playlist_mincount': 11,
3450 }, {
a0566bbf 3451 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3452 'only_matching': True,
dacb3a86 3453 }, {
da692b79 3454 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3455 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3456 'info_dict': {
3457 'id': 'FqZTN594JQw',
3458 'ext': 'webm',
3459 'title': "Smiley's People 01 detective, Adventure Series, Action",
3460 'uploader': 'STREEM',
3461 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3462 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3463 'upload_date': '20150526',
3464 'license': 'Standard YouTube License',
3465 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3466 'categories': ['People & Blogs'],
3467 'tags': list,
dbdaaa23 3468 'view_count': int,
dacb3a86
S
3469 'like_count': int,
3470 'dislike_count': int,
3471 },
3472 'params': {
3473 'skip_download': True,
3474 },
13a75688 3475 'skip': 'This video is not available.',
dacb3a86 3476 'add_ie': [YoutubeIE.ie_key()],
481cc733 3477 }, {
8bdd16b4 3478 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3479 'only_matching': True,
66b48727 3480 }, {
8bdd16b4 3481 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3482 'only_matching': True,
a0566bbf 3483 }, {
3484 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3485 'info_dict': {
57015a4a 3486 'id': '3yImotZU3tw', # This will keep changing
a0566bbf 3487 'ext': 'mp4',
deaec5af 3488 'title': compat_str,
a0566bbf 3489 'uploader': 'Sky News',
3490 'uploader_id': 'skynews',
3491 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3492 'upload_date': r're:\d{8}',
3493 'description': compat_str,
a0566bbf 3494 'categories': ['News & Politics'],
3495 'tags': list,
3496 'like_count': int,
3497 'dislike_count': int,
3498 },
3499 'params': {
3500 'skip_download': True,
3501 },
da692b79 3502 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3503 }, {
3504 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3505 'info_dict': {
3506 'id': 'a48o2S1cPoo',
3507 'ext': 'mp4',
3508 'title': 'The Young Turks - Live Main Show',
3509 'uploader': 'The Young Turks',
3510 'uploader_id': 'TheYoungTurks',
3511 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3512 'upload_date': '20150715',
3513 'license': 'Standard YouTube License',
3514 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3515 'categories': ['News & Politics'],
3516 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3517 'like_count': int,
3518 'dislike_count': int,
3519 },
3520 'params': {
3521 'skip_download': True,
3522 },
3523 'only_matching': True,
3524 }, {
3525 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3526 'only_matching': True,
3527 }, {
3528 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3529 'only_matching': True,
09f1580e 3530 }, {
3531 'note': 'A channel that is not live. Should raise error',
3532 'url': 'https://www.youtube.com/user/numberphile/live',
3533 'only_matching': True,
3d3dddc9 3534 }, {
3535 'url': 'https://www.youtube.com/feed/trending',
3536 'only_matching': True,
3537 }, {
3d3dddc9 3538 'url': 'https://www.youtube.com/feed/library',
3539 'only_matching': True,
3540 }, {
3d3dddc9 3541 'url': 'https://www.youtube.com/feed/history',
3542 'only_matching': True,
3543 }, {
3d3dddc9 3544 'url': 'https://www.youtube.com/feed/subscriptions',
3545 'only_matching': True,
3546 }, {
3d3dddc9 3547 'url': 'https://www.youtube.com/feed/watch_later',
3548 'only_matching': True,
3549 }, {
da692b79 3550 'note': 'Recommended - redirects to home page',
3d3dddc9 3551 'url': 'https://www.youtube.com/feed/recommended',
3552 'only_matching': True,
29f7c58a 3553 }, {
da692b79 3554 'note': 'inline playlist with not always working continuations',
29f7c58a 3555 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3556 'only_matching': True,
3557 }, {
3558 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3559 'only_matching': True,
3560 }, {
3561 'url': 'https://www.youtube.com/course',
3562 'only_matching': True,
3563 }, {
3564 'url': 'https://www.youtube.com/zsecurity',
3565 'only_matching': True,
3566 }, {
3567 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3568 'only_matching': True,
3569 }, {
3570 'url': 'https://www.youtube.com/TheYoungTurks/live',
3571 'only_matching': True,
39ed931e 3572 }, {
3573 'url': 'https://www.youtube.com/hashtag/cctv9',
3574 'info_dict': {
3575 'id': 'cctv9',
3576 'title': '#cctv9',
3577 },
3578 'playlist_mincount': 350,
201c1459 3579 }, {
3580 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3581 'only_matching': True,
9297939e 3582 }, {
da692b79 3583 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3584 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3585 'only_matching': True
fe03a6cd 3586 }, {
3587 'note': '/browse/ should redirect to /channel/',
3588 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3589 'only_matching': True
3590 }, {
3591 'note': 'VLPL, should redirect to playlist?list=PL...',
3592 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3593 'info_dict': {
3594 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3595 'uploader': 'NoCopyrightSounds',
3596 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3597 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3598 'title': 'NCS Releases',
3599 },
3600 'playlist_mincount': 166,
18db7548 3601 }, {
3602 'note': 'Topic, should redirect to playlist?list=UU...',
3603 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3604 'info_dict': {
3605 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3606 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3607 'title': 'Uploads from Royalty Free Music - Topic',
3608 'uploader': 'Royalty Free Music - Topic',
3609 },
3610 'expected_warnings': [
3611 'A channel/user page was given',
3612 'The URL does not have a videos tab',
3613 ],
3614 'playlist_mincount': 101,
3615 }, {
3616 'note': 'Topic without a UU playlist',
3617 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3618 'info_dict': {
3619 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3620 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3621 },
3622 'expected_warnings': [
3623 'A channel/user page was given',
3624 'The URL does not have a videos tab',
3625 'Falling back to channel URL',
3626 ],
3627 'playlist_mincount': 9,
abcdd12b 3628 }, {
3629 'note': 'Youtube music Album',
3630 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3631 'info_dict': {
3632 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3633 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3634 },
3635 'playlist_count': 50,
47193e02 3636 }, {
3637 'note': 'unlisted single video playlist',
3638 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3639 'info_dict': {
3640 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3641 'uploader': 'colethedj',
3642 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3643 'title': 'yt-dlp unlisted playlist test',
3644 'availability': 'unlisted'
3645 },
3646 'playlist_count': 1,
29f7c58a 3647 }]
3648
3649 @classmethod
3650 def suitable(cls, url):
3651 return False if YoutubeIE.suitable(url) else super(
3652 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3653
3654 def _extract_channel_id(self, webpage):
3655 channel_id = self._html_search_meta(
3656 'channelId', webpage, 'channel id', default=None)
3657 if channel_id:
3658 return channel_id
3659 channel_url = self._html_search_meta(
3660 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3661 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3662 'twitter:app:url:googleplay'), webpage, 'channel url')
3663 return self._search_regex(
3664 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3665 channel_url, 'channel id')
15f6397c 3666
8bdd16b4 3667 @staticmethod
cd7c66cf 3668 def _extract_basic_item_renderer(item):
3669 # Modified from _extract_grid_item_renderer
201c1459 3670 known_basic_renderers = (
3671 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3672 )
3673 for key, renderer in item.items():
201c1459 3674 if not isinstance(renderer, dict):
cd7c66cf 3675 continue
201c1459 3676 elif key in known_basic_renderers:
3677 return renderer
3678 elif key.startswith('grid') and key.endswith('Renderer'):
3679 return renderer
8bdd16b4 3680
8bdd16b4 3681 def _grid_entries(self, grid_renderer):
3682 for item in grid_renderer['items']:
3683 if not isinstance(item, dict):
39b62db1 3684 continue
cd7c66cf 3685 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3686 if not isinstance(renderer, dict):
3687 continue
052e1350 3688 title = self._get_text(renderer, 'title')
fe93e2c4 3689
8bdd16b4 3690 # playlist
3691 playlist_id = renderer.get('playlistId')
3692 if playlist_id:
3693 yield self.url_result(
3694 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3695 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3696 video_title=title)
201c1459 3697 continue
8bdd16b4 3698 # video
3699 video_id = renderer.get('videoId')
3700 if video_id:
3701 yield self._extract_video(renderer)
201c1459 3702 continue
8bdd16b4 3703 # channel
3704 channel_id = renderer.get('channelId')
3705 if channel_id:
8bdd16b4 3706 yield self.url_result(
3707 'https://www.youtube.com/channel/%s' % channel_id,
3708 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3709 continue
3710 # generic endpoint URL support
3711 ep_url = urljoin('https://www.youtube.com/', try_get(
3712 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3713 compat_str))
3714 if ep_url:
3715 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3716 if ie.suitable(ep_url):
3717 yield self.url_result(
3718 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3719 break
8bdd16b4 3720
3d3dddc9 3721 def _shelf_entries_from_content(self, shelf_renderer):
3722 content = shelf_renderer.get('content')
3723 if not isinstance(content, dict):
8bdd16b4 3724 return
cd7c66cf 3725 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3726 if renderer:
3727 # TODO: add support for nested playlists so each shelf is processed
3728 # as separate playlist
3729 # TODO: this includes only first N items
3730 for entry in self._grid_entries(renderer):
3731 yield entry
3732 renderer = content.get('horizontalListRenderer')
3733 if renderer:
3734 # TODO
3735 pass
8bdd16b4 3736
29f7c58a 3737 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3738 ep = try_get(
3739 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3740 compat_str)
3741 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3742 if shelf_url:
29f7c58a 3743 # Skipping links to another channels, note that checking for
3744 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3745 # will not work
3746 if skip_channels and '/channels?' in shelf_url:
3747 return
052e1350 3748 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3749 yield self.url_result(shelf_url, video_title=title)
3750 # Shelf may not contain shelf URL, fallback to extraction from content
3751 for entry in self._shelf_entries_from_content(shelf_renderer):
3752 yield entry
c5e8d7af 3753
8bdd16b4 3754 def _playlist_entries(self, video_list_renderer):
3755 for content in video_list_renderer['contents']:
3756 if not isinstance(content, dict):
3757 continue
3758 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3759 if not isinstance(renderer, dict):
3760 continue
3761 video_id = renderer.get('videoId')
3762 if not video_id:
3763 continue
3764 yield self._extract_video(renderer)
07aeced6 3765
3462ffa8 3766 def _rich_entries(self, rich_grid_renderer):
3767 renderer = try_get(
70d5c17b 3768 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3769 video_id = renderer.get('videoId')
3770 if not video_id:
3771 return
3772 yield self._extract_video(renderer)
3773
8bdd16b4 3774 def _video_entry(self, video_renderer):
3775 video_id = video_renderer.get('videoId')
3776 if video_id:
3777 return self._extract_video(video_renderer)
dacb3a86 3778
8bdd16b4 3779 def _post_thread_entries(self, post_thread_renderer):
3780 post_renderer = try_get(
3781 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3782 if not post_renderer:
3783 return
3784 # video attachment
3785 video_renderer = try_get(
895b0931 3786 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3787 video_id = video_renderer.get('videoId')
3788 if video_id:
3789 entry = self._extract_video(video_renderer)
8bdd16b4 3790 if entry:
3791 yield entry
895b0931 3792 # playlist attachment
3793 playlist_id = try_get(
3794 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3795 if playlist_id:
3796 yield self.url_result(
e28f1c0a 3797 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3798 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3799 # inline video links
3800 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3801 for run in runs:
3802 if not isinstance(run, dict):
3803 continue
3804 ep_url = try_get(
3805 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3806 if not ep_url:
3807 continue
3808 if not YoutubeIE.suitable(ep_url):
3809 continue
3810 ep_video_id = YoutubeIE._match_id(ep_url)
3811 if video_id == ep_video_id:
3812 continue
895b0931 3813 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3814
8bdd16b4 3815 def _post_thread_continuation_entries(self, post_thread_continuation):
3816 contents = post_thread_continuation.get('contents')
3817 if not isinstance(contents, list):
3818 return
3819 for content in contents:
3820 renderer = content.get('backstagePostThreadRenderer')
3821 if not isinstance(renderer, dict):
3822 continue
3823 for entry in self._post_thread_entries(renderer):
3824 yield entry
07aeced6 3825
39ed931e 3826 r''' # unused
3827 def _rich_grid_entries(self, contents):
3828 for content in contents:
3829 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3830 if video_renderer:
3831 entry = self._video_entry(video_renderer)
3832 if entry:
3833 yield entry
3834 '''
99e9e001 3835 def _entries(self, tab, item_id, account_syncid, ytcfg):
3462ffa8 3836
70d5c17b 3837 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3838 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3839 for content in contents:
3840 if not isinstance(content, dict):
8bdd16b4 3841 continue
70d5c17b 3842 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3843 if not is_renderer:
70d5c17b 3844 renderer = content.get('richItemRenderer')
3462ffa8 3845 if renderer:
3846 for entry in self._rich_entries(renderer):
3847 yield entry
3848 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3849 continue
3462ffa8 3850 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3851 for isr_content in isr_contents:
3852 if not isinstance(isr_content, dict):
3853 continue
69184e41 3854
3855 known_renderers = {
3856 'playlistVideoListRenderer': self._playlist_entries,
3857 'gridRenderer': self._grid_entries,
3858 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3859 'backstagePostThreadRenderer': self._post_thread_entries,
3860 'videoRenderer': lambda x: [self._video_entry(x)],
3861 }
3862 for key, renderer in isr_content.items():
3863 if key not in known_renderers:
3864 continue
3865 for entry in known_renderers[key](renderer):
3866 if entry:
3867 yield entry
3462ffa8 3868 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3869 break
70d5c17b 3870
3462ffa8 3871 if not continuation_list[0]:
3872 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3873
3874 if not continuation_list[0]:
3875 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3876
3877 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3878 tab_content = try_get(tab, lambda x: x['content'], dict)
3879 if not tab_content:
3880 return
3462ffa8 3881 parent_renderer = (
29f7c58a 3882 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3883 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3884 for entry in extract_entries(parent_renderer):
3885 yield entry
3462ffa8 3886 continuation = continuation_list[0]
fe93e2c4 3887 visitor_data = None
d069eca7 3888
8bdd16b4 3889 for page_num in itertools.count(1):
3890 if not continuation:
3891 break
99e9e001 3892 headers = self.generate_api_headers(
3893 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 3894 response = self._extract_response(
3895 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3896 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3897 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3898
3899 if not response:
8bdd16b4 3900 break
f4f751af 3901 visitor_data = try_get(
3902 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3903
69184e41 3904 known_continuation_renderers = {
3905 'playlistVideoListContinuation': self._playlist_entries,
3906 'gridContinuation': self._grid_entries,
3907 'itemSectionContinuation': self._post_thread_continuation_entries,
3908 'sectionListContinuation': extract_entries, # for feeds
3909 }
8bdd16b4 3910 continuation_contents = try_get(
69184e41 3911 response, lambda x: x['continuationContents'], dict) or {}
3912 continuation_renderer = None
3913 for key, value in continuation_contents.items():
3914 if key not in known_continuation_renderers:
3462ffa8 3915 continue
69184e41 3916 continuation_renderer = value
3917 continuation_list = [None]
3918 for entry in known_continuation_renderers[key](continuation_renderer):
3919 yield entry
3920 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3921 break
3922 if continuation_renderer:
3923 continue
c5e8d7af 3924
a1b535bd 3925 known_renderers = {
3926 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3927 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3928 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3929 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3930 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3931 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3932 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3933 }
cce889b9 3934 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3935 continuation_items = try_get(
cce889b9 3936 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3937 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3938 video_items_renderer = None
3939 for key, value in continuation_item.items():
3940 if key not in known_renderers:
8bdd16b4 3941 continue
a1b535bd 3942 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3943 continuation_list = [None]
a1b535bd 3944 for entry in known_renderers[key][0](video_items_renderer):
3945 yield entry
9ba5705a 3946 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3947 break
3948 if video_items_renderer:
3949 continue
8bdd16b4 3950 break
9558dcec 3951
8bdd16b4 3952 @staticmethod
3953 def _extract_selected_tab(tabs):
3954 for tab in tabs:
cd684175 3955 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3956 if renderer.get('selected') is True:
3957 return renderer
2b3c2546 3958 else:
8bdd16b4 3959 raise ExtractorError('Unable to find selected tab')
b82f815f 3960
47193e02 3961 @classmethod
3962 def _extract_uploader(cls, data):
8bdd16b4 3963 uploader = {}
47193e02 3964 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3965 owner = try_get(
3966 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3967 if owner:
3968 uploader['uploader'] = owner.get('text')
3969 uploader['uploader_id'] = try_get(
3970 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3971 uploader['uploader_url'] = urljoin(
3972 'https://www.youtube.com/',
3973 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3974 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3975
d069eca7 3976 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3977 playlist_id = title = description = channel_url = channel_name = channel_id = None
3978 thumbnails_list = tags = []
3979
8bdd16b4 3980 selected_tab = self._extract_selected_tab(tabs)
3981 renderer = try_get(
3982 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3983 if renderer:
b60419c5 3984 channel_name = renderer.get('title')
3985 channel_url = renderer.get('channelUrl')
3986 channel_id = renderer.get('externalId')
39ed931e 3987 else:
64c0d954 3988 renderer = try_get(
3989 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3990
8bdd16b4 3991 if renderer:
3992 title = renderer.get('title')
ecc97af3 3993 description = renderer.get('description', '')
b60419c5 3994 playlist_id = channel_id
3995 tags = renderer.get('keywords', '').split()
3996 thumbnails_list = (
3997 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3998 or try_get(
47193e02 3999 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
4000 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 4001 list)
b60419c5 4002 or [])
4003
4004 thumbnails = []
4005 for t in thumbnails_list:
4006 if not isinstance(t, dict):
4007 continue
4008 thumbnail_url = url_or_none(t.get('url'))
4009 if not thumbnail_url:
4010 continue
4011 thumbnails.append({
4012 'url': thumbnail_url,
4013 'width': int_or_none(t.get('width')),
4014 'height': int_or_none(t.get('height')),
4015 })
3462ffa8 4016 if playlist_id is None:
70d5c17b 4017 playlist_id = item_id
4018 if title is None:
39ed931e 4019 title = (
4020 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
4021 or playlist_id)
b60419c5 4022 title += format_field(selected_tab, 'title', ' - %s')
cd684175 4023 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 4024 metadata = {
4025 'playlist_id': playlist_id,
4026 'playlist_title': title,
4027 'playlist_description': description,
4028 'uploader': channel_name,
4029 'uploader_id': channel_id,
4030 'uploader_url': channel_url,
4031 'thumbnails': thumbnails,
4032 'tags': tags,
4033 }
47193e02 4034 availability = self._extract_availability(data)
4035 if availability:
4036 metadata['availability'] = availability
b60419c5 4037 if not channel_id:
4038 metadata.update(self._extract_uploader(data))
4039 metadata.update({
4040 'channel': metadata['uploader'],
4041 'channel_id': metadata['uploader_id'],
4042 'channel_url': metadata['uploader_url']})
11f9be09 4043 ytcfg = self.extract_ytcfg(item_id, webpage)
b60419c5 4044 return self.playlist_result(
d069eca7
M
4045 self._entries(
4046 selected_tab, playlist_id,
fe93e2c4 4047 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 4048 **metadata)
73c4ac2c 4049
79360d99 4050 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 4051 first_id = last_id = None
11f9be09 4052 ytcfg = self.extract_ytcfg(playlist_id, webpage)
4053 headers = self.generate_api_headers(
99e9e001 4054 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data))
2be71994 4055 for page_num in itertools.count(1):
cd7c66cf 4056 videos = list(self._playlist_entries(playlist))
4057 if not videos:
4058 return
2be71994 4059 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4060 if start >= len(videos):
4061 return
4062 for video in videos[start:]:
4063 if video['id'] == first_id:
4064 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4065 return
4066 yield video
4067 first_id = first_id or videos[0]['id']
4068 last_id = videos[-1]['id']
79360d99 4069 watch_endpoint = try_get(
4070 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4071 query = {
4072 'playlistId': playlist_id,
4073 'videoId': watch_endpoint.get('videoId') or last_id,
4074 'index': watch_endpoint.get('index') or len(videos),
4075 'params': watch_endpoint.get('params') or 'OAE%3D'
4076 }
4077 response = self._extract_response(
4078 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4079 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4080 check_get_keys='contents'
4081 )
cd7c66cf 4082 playlist = try_get(
79360d99 4083 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4084
79360d99 4085 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 4086 title = playlist.get('title') or try_get(
4087 data, lambda x: x['titleText']['simpleText'], compat_str)
4088 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4089
4090 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4091 playlist_url = urljoin(url, try_get(
4092 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4093 compat_str))
4094 if playlist_url and playlist_url != url:
4095 return self.url_result(
4096 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4097 video_title=title)
cd7c66cf 4098
8bdd16b4 4099 return self.playlist_result(
79360d99 4100 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 4101 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4102
47193e02 4103 def _extract_availability(self, data):
4104 """
4105 Gets the availability of a given playlist/tab.
4106 Note: Unless YouTube tells us explicitly, we do not assume it is public
4107 @param data: response
4108 """
4109 is_private = is_unlisted = None
4110 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4111 badge_labels = self._extract_badges(renderer)
4112
4113 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4114 privacy_dropdown_entries = try_get(
4115 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4116 for renderer_dict in privacy_dropdown_entries:
4117 is_selected = try_get(
4118 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4119 if not is_selected:
4120 continue
052e1350 4121 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4122 if label:
4123 badge_labels.add(label.lower())
4124 break
4125
4126 for badge_label in badge_labels:
4127 if badge_label == 'unlisted':
4128 is_unlisted = True
4129 elif badge_label == 'private':
4130 is_private = True
4131 elif badge_label == 'public':
4132 is_unlisted = is_private = False
4133 return self._availability(is_private, False, False, False, is_unlisted)
4134
4135 @staticmethod
4136 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4137 sidebar_renderer = try_get(
4138 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4139 for item in sidebar_renderer:
4140 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4141 if renderer:
4142 return renderer
4143
358de58c 4144 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4145 """
4146 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4147 """
5d342002 4148 browse_id = params = None
47193e02 4149 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4150 if not renderer:
4151 return
4152 menu_renderer = try_get(
4153 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4154 for menu_item in menu_renderer:
4155 if not isinstance(menu_item, dict):
358de58c 4156 continue
47193e02 4157 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4158 text = try_get(
4159 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4160 if not text or text.lower() != 'show unavailable videos':
4161 continue
4162 browse_endpoint = try_get(
4163 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4164 browse_id = browse_endpoint.get('browseId')
4165 params = browse_endpoint.get('params')
4166 break
5d342002 4167
11f9be09 4168 ytcfg = self.extract_ytcfg(item_id, webpage)
4169 headers = self.generate_api_headers(
99e9e001 4170 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4171 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
47193e02 4172 query = {
4173 'params': params or 'wgYCCAA=',
4174 'browseId': browse_id or 'VL%s' % item_id
4175 }
4176 return self._extract_response(
4177 item_id=item_id, headers=headers, query=query,
fe93e2c4 4178 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4179 note='Downloading API JSON with unavailable videos')
358de58c 4180
cd7c66cf 4181 def _extract_webpage(self, url, item_id):
a06916d9 4182 retries = self.get_param('extractor_retries', 3)
62bff2c1 4183 count = -1
c705177d 4184 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4185 while count < retries:
62bff2c1 4186 count += 1
14fdfea9 4187 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4188 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4189 if count:
c705177d 4190 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4191 webpage = self._download_webpage(
4192 url, item_id,
cd7c66cf 4193 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
11f9be09 4194 data = self.extract_yt_initial_data(item_id, webpage)
14fdfea9 4195 if data.get('contents') or data.get('currentVideoEndpoint'):
4196 break
95c01b6c 4197 # Extract alerts here only when there is error
4198 self._extract_and_report_alerts(data)
c705177d 4199 if count >= retries:
6a39ee13 4200 raise ExtractorError(last_error)
cd7c66cf 4201 return webpage, data
4202
9297939e 4203 @staticmethod
4204 def _smuggle_data(entries, data):
4205 for entry in entries:
4206 if data:
4207 entry['url'] = smuggle_url(entry['url'], data)
4208 yield entry
4209
cd7c66cf 4210 def _real_extract(self, url):
9297939e 4211 url, smuggled_data = unsmuggle_url(url, {})
4212 if self.is_music_url(url):
4213 smuggled_data['is_music_url'] = True
fe03a6cd 4214 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4215 if info_dict.get('entries'):
4216 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4217 return info_dict
4218
fe03a6cd 4219 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4220
4221 def __real_extract(self, url, smuggled_data):
cd7c66cf 4222 item_id = self._match_id(url)
4223 url = compat_urlparse.urlunparse(
4224 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4225 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4226
fe03a6cd 4227 def get_mobj(url):
4228 mobj = self._url_re.match(url).groupdict()
07cce701 4229 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4230 return mobj
4231
4232 mobj = get_mobj(url)
4233 # Youtube returns incomplete data if tabname is not lower case
4234 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4235
4236 if is_channel:
4237 if smuggled_data.get('is_music_url'):
4238 if item_id[:2] == 'VL':
4239 # Youtube music VL channels have an equivalent playlist
4240 item_id = item_id[2:]
4241 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4242 elif item_id[:2] == 'MP':
4243 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4244 item_id = self._search_regex(
4245 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4246 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4247 'playlist id')
4248 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4249 elif mobj['channel_type'] == 'browse':
4250 # Youtube music /browse/ should be changed to /channel/
4251 pre = 'https://www.youtube.com/channel/%s' % item_id
4252 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4253 # Home URLs should redirect to /videos/
6a39ee13 4254 self.report_warning(
cd7c66cf 4255 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4256 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4257 tab = '/videos'
4258
4259 url = ''.join((pre, tab, post))
4260 mobj = get_mobj(url)
cd7c66cf 4261
4262 # Handle both video/playlist URLs
201c1459 4263 qs = parse_qs(url)
cd7c66cf 4264 video_id = qs.get('v', [None])[0]
4265 playlist_id = qs.get('list', [None])[0]
4266
fe03a6cd 4267 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4268 if not playlist_id:
fe03a6cd 4269 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4270 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4271 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4272 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4273 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4274 mobj = get_mobj(url)
cd7c66cf 4275
4276 if video_id and playlist_id:
a06916d9 4277 if self.get_param('noplaylist'):
cd7c66cf 4278 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
5e3f2f8f 4279 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4280 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4281
4282 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4283
18db7548 4284 tabs = try_get(
4285 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4286 if tabs:
4287 selected_tab = self._extract_selected_tab(tabs)
4288 tab_name = selected_tab.get('title', '')
09f1580e 4289 if 'no-youtube-channel-redirect' not in compat_opts:
4290 if mobj['tab'] == '/live':
4291 # Live tab should have redirected to the video
4292 raise ExtractorError('The channel is not currently live', expected=True)
4293 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4294 if not mobj['not_channel'] and item_id[:2] == 'UC':
4295 # Topic channels don't have /videos. Use the equivalent playlist instead
4296 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4297 pl_id = 'UU%s' % item_id[2:]
4298 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4299 try:
4300 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4301 for alert_type, alert_message in self._extract_alerts(pl_data):
4302 if alert_type == 'error':
4303 raise ExtractorError('Youtube said: %s' % alert_message)
4304 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4305 except ExtractorError:
4306 self.report_warning('The playlist gave error. Falling back to channel URL')
4307 else:
4308 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4309
4310 self.write_debug('Final URL: %s' % url)
4311
358de58c 4312 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4313 if 'no-youtube-unavailable-videos' not in compat_opts:
4314 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
c0ac49bc 4315 self._extract_and_report_alerts(data, only_once=True)
8bdd16b4 4316 tabs = try_get(
4317 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4318 if tabs:
d069eca7 4319 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4320
8bdd16b4 4321 playlist = try_get(
4322 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4323 if playlist:
79360d99 4324 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4325
a0566bbf 4326 video_id = try_get(
4327 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4328 compat_str) or video_id
8bdd16b4 4329 if video_id:
09f1580e 4330 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4331 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
5e3f2f8f 4332 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4333
8bdd16b4 4334 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4335
c5e8d7af 4336
8bdd16b4 4337class YoutubePlaylistIE(InfoExtractor):
4338 IE_DESC = 'YouTube.com playlists'
4339 _VALID_URL = r'''(?x)(?:
4340 (?:https?://)?
4341 (?:\w+\.)?
4342 (?:
4343 (?:
4344 youtube(?:kids)?\.com|
29f7c58a 4345 invidio\.us
8bdd16b4 4346 )
4347 /.*?\?.*?\blist=
4348 )?
4349 (?P<id>%(playlist_id)s)
4350 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4351 IE_NAME = 'youtube:playlist'
cdc628a4 4352 _TESTS = [{
8bdd16b4 4353 'note': 'issue #673',
4354 'url': 'PLBB231211A4F62143',
cdc628a4 4355 'info_dict': {
8bdd16b4 4356 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4357 'id': 'PLBB231211A4F62143',
4358 'uploader': 'Wickydoo',
4359 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4360 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4361 },
4362 'playlist_mincount': 29,
4363 }, {
4364 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4365 'info_dict': {
4366 'title': 'YDL_safe_search',
4367 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4368 },
4369 'playlist_count': 2,
4370 'skip': 'This playlist is private',
9558dcec 4371 }, {
8bdd16b4 4372 'note': 'embedded',
4373 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4374 'playlist_count': 4,
9558dcec 4375 'info_dict': {
8bdd16b4 4376 'title': 'JODA15',
4377 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4378 'uploader': 'milan',
4379 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4380 }
cdc628a4 4381 }, {
8bdd16b4 4382 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4383 'playlist_mincount': 654,
8bdd16b4 4384 'info_dict': {
4385 'title': '2018 Chinese New Singles (11/6 updated)',
4386 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4387 'uploader': 'LBK',
4388 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4389 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4390 }
daa0df9e 4391 }, {
29f7c58a 4392 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4393 'only_matching': True,
4394 }, {
4395 # music album playlist
4396 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4397 'only_matching': True,
4398 }]
4399
4400 @classmethod
4401 def suitable(cls, url):
201c1459 4402 if YoutubeTabIE.suitable(url):
4403 return False
1bdae7d3 4404 # Hack for lazy extractors until more generic solution is implemented
4405 # (see #28780)
4406 from .youtube import parse_qs
201c1459 4407 qs = parse_qs(url)
4408 if qs.get('v', [None])[0]:
4409 return False
4410 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4411
4412 def _real_extract(self, url):
4413 playlist_id = self._match_id(url)
46953e7e 4414 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4415 url = update_url_query(
4416 'https://www.youtube.com/playlist',
4417 parse_qs(url) or {'list': playlist_id})
4418 if is_music_url:
4419 url = smuggle_url(url, {'is_music_url': True})
4420 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4421
4422
4423class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4424 IE_DESC = 'youtu.be'
29f7c58a 4425 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4426 _TESTS = [{
8bdd16b4 4427 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4428 'info_dict': {
4429 'id': 'yeWKywCrFtk',
4430 'ext': 'mp4',
4431 'title': 'Small Scale Baler and Braiding Rugs',
4432 'uploader': 'Backus-Page House Museum',
4433 'uploader_id': 'backuspagemuseum',
4434 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4435 'upload_date': '20161008',
4436 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4437 'categories': ['Nonprofits & Activism'],
4438 'tags': list,
4439 'like_count': int,
4440 'dislike_count': int,
4441 },
4442 'params': {
4443 'noplaylist': True,
4444 'skip_download': True,
4445 },
39e7107d 4446 }, {
8bdd16b4 4447 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4448 'only_matching': True,
cdc628a4
PH
4449 }]
4450
8bdd16b4 4451 def _real_extract(self, url):
5ad28e7f 4452 mobj = self._match_valid_url(url)
29f7c58a 4453 video_id = mobj.group('id')
4454 playlist_id = mobj.group('playlist_id')
8bdd16b4 4455 return self.url_result(
29f7c58a 4456 update_url_query('https://www.youtube.com/watch', {
4457 'v': video_id,
4458 'list': playlist_id,
4459 'feature': 'youtu.be',
4460 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4461
4462
4463class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4464 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4465 _VALID_URL = r'ytuser:(?P<id>.+)'
4466 _TESTS = [{
4467 'url': 'ytuser:phihag',
4468 'only_matching': True,
4469 }]
4470
4471 def _real_extract(self, url):
4472 user_id = self._match_id(url)
4473 return self.url_result(
4474 'https://www.youtube.com/user/%s' % user_id,
4475 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4476
b05654f0 4477
3d3dddc9 4478class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4479 IE_NAME = 'youtube:favorites'
4480 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4481 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4482 _LOGIN_REQUIRED = True
4483 _TESTS = [{
4484 'url': ':ytfav',
4485 'only_matching': True,
4486 }, {
4487 'url': ':ytfavorites',
4488 'only_matching': True,
4489 }]
4490
4491 def _real_extract(self, url):
4492 return self.url_result(
4493 'https://www.youtube.com/playlist?list=LL',
4494 ie=YoutubeTabIE.ie_key())
4495
4496
79360d99 4497class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4498 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4499 # there doesn't appear to be a real limit, for example if you search for
4500 # 'python' you get more than 8.000.000 results
4501 _MAX_RESULTS = float('inf')
78caa52a 4502 IE_NAME = 'youtube:search'
b05654f0 4503 _SEARCH_KEY = 'ytsearch'
6c894ea1 4504 _SEARCH_PARAMS = None
9dd8e46a 4505 _TESTS = []
b05654f0 4506
6c894ea1 4507 def _entries(self, query, n):
a5c56234 4508 data = {'query': query}
6c894ea1
U
4509 if self._SEARCH_PARAMS:
4510 data['params'] = self._SEARCH_PARAMS
4511 total = 0
fe93e2c4 4512 continuation = {}
6c894ea1 4513 for page_num in itertools.count(1):
fe93e2c4 4514 data.update(continuation)
79360d99 4515 search = self._extract_response(
4516 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4517 check_get_keys=('contents', 'onResponseReceivedCommands')
4518 )
6c894ea1 4519 if not search:
b4c08069 4520 break
6c894ea1
U
4521 slr_contents = try_get(
4522 search,
4523 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4524 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4525 list)
4526 if not slr_contents:
a22b2fd1 4527 break
0366ae87 4528
0366ae87
M
4529 # Youtube sometimes adds promoted content to searches,
4530 # changing the index location of videos and token.
4531 # So we search through all entries till we find them.
fe93e2c4 4532 continuation = None
30a074c2 4533 for slr_content in slr_contents:
fe93e2c4 4534 if not continuation:
4535 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4536
30a074c2 4537 isr_contents = try_get(
4538 slr_content,
4539 lambda x: x['itemSectionRenderer']['contents'],
4540 list)
9da76d30 4541 if not isr_contents:
30a074c2 4542 continue
4543 for content in isr_contents:
4544 if not isinstance(content, dict):
4545 continue
4546 video = content.get('videoRenderer')
4547 if not isinstance(video, dict):
4548 continue
4549 video_id = video.get('videoId')
4550 if not video_id:
4551 continue
4552
4553 yield self._extract_video(video)
4554 total += 1
4555 if total == n:
4556 return
0366ae87 4557
fe93e2c4 4558 if not continuation:
6c894ea1 4559 break
b05654f0 4560
6c894ea1
U
4561 def _get_n_results(self, query, n):
4562 """Get a specified number of results for a query"""
11f9be09 4563 return self.playlist_result(self._entries(query, n), query, query)
75dff0ee 4564
c9ae7b95 4565
a3dd9248 4566class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4567 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4568 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4569 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4570 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4571
c9ae7b95 4572
386e1dd9 4573class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4574 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4575 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4576 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4577 # _MAX_RESULTS = 100
3462ffa8 4578 _TESTS = [{
4579 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4580 'playlist_mincount': 5,
4581 'info_dict': {
11f9be09 4582 'id': 'youtube-dl test video',
3462ffa8 4583 'title': 'youtube-dl test video',
4584 }
4585 }, {
4586 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4587 'only_matching': True,
4588 }]
4589
386e1dd9 4590 @classmethod
4591 def _make_valid_url(cls):
4592 return cls._VALID_URL
4593
3462ffa8 4594 def _real_extract(self, url):
4dfbf869 4595 qs = parse_qs(url)
386e1dd9 4596 query = (qs.get('search_query') or qs.get('q'))[0]
4597 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4598 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4599
4600
4601class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4602 """
25f14e9f 4603 Base class for feed extractors
3d3dddc9 4604 Subclasses must define the _FEED_NAME property.
d7ae0639 4605 """
b2e8bc1b 4606 _LOGIN_REQUIRED = True
ef2f3c7f 4607 _TESTS = []
d7ae0639
JMF
4608
4609 @property
4610 def IE_NAME(self):
78caa52a 4611 return 'youtube:%s' % self._FEED_NAME
04cc9617 4612
3853309f 4613 def _real_extract(self, url):
3d3dddc9 4614 return self.url_result(
4615 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4616 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4617
4618
ef2f3c7f 4619class YoutubeWatchLaterIE(InfoExtractor):
4620 IE_NAME = 'youtube:watchlater'
70d5c17b 4621 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4622 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4623 _TESTS = [{
8bdd16b4 4624 'url': ':ytwatchlater',
bc7a9cd8
S
4625 'only_matching': True,
4626 }]
25f14e9f
S
4627
4628 def _real_extract(self, url):
ef2f3c7f 4629 return self.url_result(
4630 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4631
4632
25f14e9f
S
4633class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4634 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4635 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4636 _FEED_NAME = 'recommended'
45db527f 4637 _LOGIN_REQUIRED = False
3d3dddc9 4638 _TESTS = [{
4639 'url': ':ytrec',
4640 'only_matching': True,
4641 }, {
4642 'url': ':ytrecommended',
4643 'only_matching': True,
4644 }, {
4645 'url': 'https://youtube.com',
4646 'only_matching': True,
4647 }]
1ed5b5c9 4648
1ed5b5c9 4649
25f14e9f 4650class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4651 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4652 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4653 _FEED_NAME = 'subscriptions'
3d3dddc9 4654 _TESTS = [{
4655 'url': ':ytsubs',
4656 'only_matching': True,
4657 }, {
4658 'url': ':ytsubscriptions',
4659 'only_matching': True,
4660 }]
1ed5b5c9 4661
1ed5b5c9 4662
25f14e9f 4663class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4664 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4665 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4666 _FEED_NAME = 'history'
3d3dddc9 4667 _TESTS = [{
4668 'url': ':ythistory',
4669 'only_matching': True,
4670 }]
1ed5b5c9
JMF
4671
4672
15870e90
PH
4673class YoutubeTruncatedURLIE(InfoExtractor):
4674 IE_NAME = 'youtube:truncated_url'
4675 IE_DESC = False # Do not list
975d35db 4676 _VALID_URL = r'''(?x)
b95aab84
PH
4677 (?:https?://)?
4678 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4679 (?:watch\?(?:
c4808c60 4680 feature=[a-z_]+|
b95aab84
PH
4681 annotation_id=annotation_[^&]+|
4682 x-yt-cl=[0-9]+|
c1708b89 4683 hl=[^&]*|
287be8c6 4684 t=[0-9]+
b95aab84
PH
4685 )?
4686 |
4687 attribution_link\?a=[^&]+
4688 )
4689 $
975d35db 4690 '''
15870e90 4691
c4808c60 4692 _TESTS = [{
2d3d2997 4693 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4694 'only_matching': True,
dc2fc736 4695 }, {
2d3d2997 4696 'url': 'https://www.youtube.com/watch?',
dc2fc736 4697 'only_matching': True,
b95aab84
PH
4698 }, {
4699 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4700 'only_matching': True,
4701 }, {
4702 'url': 'https://www.youtube.com/watch?feature=foo',
4703 'only_matching': True,
c1708b89
PH
4704 }, {
4705 'url': 'https://www.youtube.com/watch?hl=en-GB',
4706 'only_matching': True,
287be8c6
PH
4707 }, {
4708 'url': 'https://www.youtube.com/watch?t=2372',
4709 'only_matching': True,
c4808c60
PH
4710 }]
4711
15870e90
PH
4712 def _real_extract(self, url):
4713 raise ExtractorError(
78caa52a
PH
4714 'Did you forget to quote the URL? Remember that & is a meta '
4715 'character in most shells, so you want to put the URL in quotes, '
3867038a 4716 'like youtube-dl '
2d3d2997 4717 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4718 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4719 expected=True)
772fd5cc
PH
4720
4721
3cd786db 4722class YoutubeClipIE(InfoExtractor):
4723 IE_NAME = 'youtube:clip'
4724 IE_DESC = False # Do not list
4725 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4726
4727 def _real_extract(self, url):
4728 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4729 return self.url_result(url, 'Generic')
4730
4731
772fd5cc
PH
4732class YoutubeTruncatedIDIE(InfoExtractor):
4733 IE_NAME = 'youtube:truncated_id'
4734 IE_DESC = False # Do not list
b95aab84 4735 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4736
4737 _TESTS = [{
4738 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4739 'only_matching': True,
4740 }]
4741
4742 def _real_extract(self, url):
4743 video_id = self._match_id(url)
4744 raise ExtractorError(
4745 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4746 expected=True)