]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[Mediaklikk] Add Extractor (#867)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
641ad5d8 41 is_html,
94278f72 42 mimetype2ext,
9c0d7f49 43 network_exceptions,
11f9be09 44 orderedSet,
6310acf5 45 parse_codecs,
49bd8c66 46 parse_count,
7c80519c 47 parse_duration,
7ea65411 48 parse_iso8601,
4dfbf869 49 parse_qs,
dca3ff4a 50 qualities,
c0ac49bc 51 remove_end,
3995d37d 52 remove_start,
cf7e015f 53 smuggle_url,
dbdaaa23 54 str_or_none,
c93d53f5 55 str_to_int,
7c365c21 56 traverse_obj,
556dbe7f 57 try_get,
c5e8d7af
PH
58 unescapeHTML,
59 unified_strdate,
cf7e015f 60 unsmuggle_url,
8bdd16b4 61 update_url_query,
21c340b8 62 url_or_none,
fe93e2c4 63 urljoin,
7c365c21 64 variadic,
c5e8d7af
PH
65)
66
5f6a1245 67
000c15a4 68# any clients starting with _ cannot be explicity requested by the user
69INNERTUBE_CLIENTS = {
70 'web': {
71 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
72 'INNERTUBE_CONTEXT': {
73 'client': {
74 'clientName': 'WEB',
75 'clientVersion': '2.20210622.10.00',
76 }
77 },
78 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
79 },
80 'web_embedded': {
81 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
82 'INNERTUBE_CONTEXT': {
83 'client': {
84 'clientName': 'WEB_EMBEDDED_PLAYER',
85 'clientVersion': '1.20210620.0.1',
86 },
87 },
88 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
89 },
90 'web_music': {
91 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
92 'INNERTUBE_HOST': 'music.youtube.com',
93 'INNERTUBE_CONTEXT': {
94 'client': {
95 'clientName': 'WEB_REMIX',
96 'clientVersion': '1.20210621.00.00',
97 }
98 },
99 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
100 },
e7e94f2a
D
101 'web_creator': {
102 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
103 'INNERTUBE_CONTEXT': {
104 'client': {
105 'clientName': 'WEB_CREATOR',
106 'clientVersion': '1.20210621.00.00',
107 }
108 },
109 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
110 },
000c15a4 111 'android': {
112 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
113 'INNERTUBE_CONTEXT': {
114 'client': {
115 'clientName': 'ANDROID',
116 'clientVersion': '16.20',
117 }
118 },
119 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
120 },
121 'android_embedded': {
122 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
123 'INNERTUBE_CONTEXT': {
124 'client': {
125 'clientName': 'ANDROID_EMBEDDED_PLAYER',
126 'clientVersion': '16.20',
127 },
128 },
129 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
130 },
131 'android_music': {
132 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
133 'INNERTUBE_HOST': 'music.youtube.com',
134 'INNERTUBE_CONTEXT': {
135 'client': {
136 'clientName': 'ANDROID_MUSIC',
137 'clientVersion': '4.32',
138 }
139 },
140 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
141 },
e7e94f2a
D
142 'android_creator': {
143 'INNERTUBE_CONTEXT': {
144 'client': {
145 'clientName': 'ANDROID_CREATOR',
146 'clientVersion': '21.24.100',
147 },
148 },
149 'INNERTUBE_CONTEXT_CLIENT_NAME': 14
150 },
3619f78d 151 # ios has HLS live streams
152 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
000c15a4 153 'ios': {
154 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
155 'INNERTUBE_CONTEXT': {
156 'client': {
157 'clientName': 'IOS',
158 'clientVersion': '16.20',
159 }
160 },
161 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
162 },
163 'ios_embedded': {
164 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
165 'INNERTUBE_CONTEXT': {
166 'client': {
167 'clientName': 'IOS_MESSAGES_EXTENSION',
168 'clientVersion': '16.20',
169 },
170 },
171 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
172 },
173 'ios_music': {
174 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
175 'INNERTUBE_HOST': 'music.youtube.com',
176 'INNERTUBE_CONTEXT': {
177 'client': {
178 'clientName': 'IOS_MUSIC',
179 'clientVersion': '4.32',
180 },
181 },
182 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
183 },
e7e94f2a
D
184 'ios_creator': {
185 'INNERTUBE_CONTEXT': {
186 'client': {
187 'clientName': 'IOS_CREATOR',
188 'clientVersion': '21.24.100',
189 },
190 },
191 'INNERTUBE_CONTEXT_CLIENT_NAME': 15
192 },
3619f78d 193 # mweb has 'ultralow' formats
194 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 195 'mweb': {
196 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
197 'INNERTUBE_CONTEXT': {
198 'client': {
199 'clientName': 'MWEB',
200 'clientVersion': '2.20210721.07.00',
201 }
202 },
203 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
204 },
205}
206
207
208def build_innertube_clients():
65c2fde2 209 third_party = {
210 'embedUrl': 'https://google.com', # Can be any valid URL
211 }
000c15a4 212 base_clients = ('android', 'web', 'ios', 'mweb')
213 priority = qualities(base_clients[::-1])
214
215 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 216 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 217 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
218 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
219 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
220
221 if client in base_clients:
222 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
223 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
65c2fde2 224 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 225 agegate_ytcfg['priority'] -= 1
226 elif client.endswith('_embedded'):
65c2fde2 227 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 228 ytcfg['priority'] -= 2
229 else:
230 ytcfg['priority'] -= 3
231
232
233build_innertube_clients()
234
235
de7f3446 236class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 237 """Provide base functions for Youtube extractors"""
e00eb564 238
3462ffa8 239 _RESERVED_NAMES = (
3619f78d 240 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
241 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
242 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 243 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 244
3619f78d 245 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
246
b2e8bc1b 247 _NETRC_MACHINE = 'youtube'
3619f78d 248
b2e8bc1b
JMF
249 # If True it will raise an error if no login info is provided
250 _LOGIN_REQUIRED = False
251
3619f78d 252 r''' # Unused since login is broken
253 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
254 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
255
256 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
257 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
258 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
259 '''
d0ba5587 260
b2e8bc1b 261 def _login(self):
83317f69 262 """
263 Attempt to log in to YouTube.
264 True is returned if successful or skipped.
265 False is returned if login failed.
266
267 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
268 """
9d5d4d64 269
270 def warn(message):
271 self.report_warning(message)
272
273 # username+password login is broken
982ee69a
MB
274 if (self._LOGIN_REQUIRED
275 and self.get_param('cookiefile') is None
276 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 277 self.raise_login_required(
278 'Login details are needed to download this content', method='cookies')
68217024 279 username, password = self._get_login_info()
9d5d4d64 280 if username:
281 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
282 return
9d5d4d64 283
2d6659b9 284 # Everything below this is broken!
285 r'''
b2e8bc1b
JMF
286 # No authentication to be performed
287 if username is None:
a06916d9 288 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 289 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 290 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 291 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 292 return True
b2e8bc1b 293
7cc3570e
PH
294 login_page = self._download_webpage(
295 self._LOGIN_URL, None,
69ea8ca4
PH
296 note='Downloading login page',
297 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
298 if login_page is False:
299 return
b2e8bc1b 300
1212e997 301 login_form = self._hidden_inputs(login_page)
c5e8d7af 302
e00eb564
S
303 def req(url, f_req, note, errnote):
304 data = login_form.copy()
305 data.update({
306 'pstMsg': 1,
307 'checkConnection': 'youtube',
308 'checkedDomains': 'youtube',
309 'hl': 'en',
310 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 311 'f.req': json.dumps(f_req),
e00eb564
S
312 'flowName': 'GlifWebSignIn',
313 'flowEntry': 'ServiceLogin',
baf67a60
S
314 # TODO: reverse actual botguard identifier generation algo
315 'bgRequest': '["identifier",""]',
041bc3ad 316 })
e00eb564
S
317 return self._download_json(
318 url, None, note=note, errnote=errnote,
319 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
320 fatal=False,
321 data=urlencode_postdata(data), headers={
322 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
323 'Google-Accounts-XSRF': 1,
324 })
325
3995d37d
S
326 lookup_req = [
327 username,
328 None, [], None, 'US', None, None, 2, False, True,
329 [
330 None, None,
331 [2, 1, None, 1,
332 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
333 None, [], 4],
334 1, [None, None, []], None, None, None, True
335 ],
336 username,
337 ]
338
e00eb564 339 lookup_results = req(
3995d37d 340 self._LOOKUP_URL, lookup_req,
e00eb564
S
341 'Looking up account info', 'Unable to look up account info')
342
343 if lookup_results is False:
344 return False
041bc3ad 345
3995d37d
S
346 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
347 if not user_hash:
348 warn('Unable to extract user hash')
349 return False
350
351 challenge_req = [
352 user_hash,
353 None, 1, None, [1, None, None, None, [password, None, True]],
354 [
355 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
356 1, [None, None, []], None, None, None, True
357 ]]
83317f69 358
3995d37d
S
359 challenge_results = req(
360 self._CHALLENGE_URL, challenge_req,
361 'Logging in', 'Unable to log in')
83317f69 362
3995d37d 363 if challenge_results is False:
e00eb564 364 return
83317f69 365
3995d37d
S
366 login_res = try_get(challenge_results, lambda x: x[0][5], list)
367 if login_res:
368 login_msg = try_get(login_res, lambda x: x[5], compat_str)
369 warn(
370 'Unable to login: %s' % 'Invalid password'
371 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
372 return False
373
374 res = try_get(challenge_results, lambda x: x[0][-1], list)
375 if not res:
376 warn('Unable to extract result entry')
377 return False
378
9a6628aa
S
379 login_challenge = try_get(res, lambda x: x[0][0], list)
380 if login_challenge:
381 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
382 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
383 # SEND_SUCCESS - TFA code has been successfully sent to phone
384 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 385 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
386 if status == 'QUOTA_EXCEEDED':
387 warn('Exceeded the limit of TFA codes, try later')
388 return False
389
390 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
391 if not tl:
392 warn('Unable to extract TL')
393 return False
394
395 tfa_code = self._get_tfa_info('2-step verification code')
396
397 if not tfa_code:
398 warn(
399 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
400 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
401 return False
402
403 tfa_code = remove_start(tfa_code, 'G-')
404
405 tfa_req = [
406 user_hash, None, 2, None,
407 [
408 9, None, None, None, None, None, None, None,
409 [None, tfa_code, True, 2]
410 ]]
411
412 tfa_results = req(
413 self._TFA_URL.format(tl), tfa_req,
414 'Submitting TFA code', 'Unable to submit TFA code')
415
416 if tfa_results is False:
417 return False
418
419 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
420 if tfa_res:
421 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
422 warn(
423 'Unable to finish TFA: %s' % 'Invalid TFA code'
424 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
425 return False
426
427 check_cookie_url = try_get(
428 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
429 else:
430 CHALLENGES = {
431 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
432 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
433 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
434 }
435 challenge = CHALLENGES.get(
436 challenge_str,
437 '%s returned error %s.' % (self.IE_NAME, challenge_str))
438 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
439 return False
3995d37d
S
440 else:
441 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
442
443 if not check_cookie_url:
444 warn('Unable to extract CheckCookie URL')
445 return False
e00eb564
S
446
447 check_cookie_results = self._download_webpage(
3995d37d
S
448 check_cookie_url, None, 'Checking cookie', fatal=False)
449
450 if check_cookie_results is False:
451 return False
e00eb564 452
3995d37d
S
453 if 'https://myaccount.google.com/' not in check_cookie_results:
454 warn('Unable to log in')
b2e8bc1b 455 return False
e00eb564 456
b2e8bc1b 457 return True
2d6659b9 458 '''
b2e8bc1b 459
cce889b9 460 def _initialize_consent(self):
461 cookies = self._get_cookies('https://www.youtube.com/')
462 if cookies.get('__Secure-3PSID'):
463 return
464 consent_id = None
465 consent = cookies.get('CONSENT')
466 if consent:
467 if 'YES' in consent.value:
468 return
469 consent_id = self._search_regex(
470 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
471 if not consent_id:
472 consent_id = random.randint(100, 999)
473 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 474
b2e8bc1b 475 def _real_initialize(self):
cce889b9 476 self._initialize_consent()
b2e8bc1b
JMF
477 if self._downloader is None:
478 return
b2e8bc1b
JMF
479 if not self._login():
480 return
c5e8d7af 481
a0566bbf 482 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 483 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
484 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 485
000c15a4 486 def _get_default_ytcfg(self, client='web'):
487 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 488
000c15a4 489 def _get_innertube_host(self, client='web'):
490 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 491
000c15a4 492 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 493 # try_get but with fallback to default ytcfg client values when present
494 _func = lambda y: try_get(y, getter, expected_type)
495 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
496
000c15a4 497 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 498 return self._ytcfg_get_safe(
499 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
500 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 501
314ee305 502 @staticmethod
11f9be09 503 def _extract_session_index(*data):
504 for ytcfg in data:
505 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
506 if session_index is not None:
507 return session_index
314ee305 508
000c15a4 509 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 510 return self._ytcfg_get_safe(
511 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
512 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 513
000c15a4 514 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 515 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
516
000c15a4 517 def _extract_context(self, ytcfg=None, default_client='web'):
109dd3b2 518 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
519 context = _get_context(ytcfg)
520 if context:
521 return context
522
523 context = _get_context(self._get_default_ytcfg(default_client))
524 if not ytcfg:
525 return context
526
527 # Recreate the client context (required)
528 context['client'].update({
529 'clientVersion': self._extract_client_version(ytcfg, default_client),
530 'clientName': self._extract_client_name(ytcfg, default_client),
531 })
532 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
533 if visitor_data:
534 context['client']['visitorData'] = visitor_data
535 return context
536
cf87314d 537 _SAPISID = None
538
109dd3b2 539 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 540 time_now = round(time.time())
cf87314d 541 if self._SAPISID is None:
542 yt_cookies = self._get_cookies('https://www.youtube.com')
543 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
544 # See: https://github.com/yt-dlp/yt-dlp/issues/393
545 sapisid_cookie = dict_get(
546 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
547 if sapisid_cookie and sapisid_cookie.value:
548 self._SAPISID = sapisid_cookie.value
549 self.write_debug('Extracted SAPISID cookie')
550 # SAPISID cookie is required if not already present
551 if not yt_cookies.get('SAPISID'):
552 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
553 self._set_cookie(
554 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
555 else:
556 self._SAPISID = False
557 if not self._SAPISID:
558 return None
1974e99f 559 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
560 sapisidhash = hashlib.sha1(
cf87314d 561 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
1974e99f 562 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
563
564 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 565 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 566 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 567
109dd3b2 568 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 569 data.update(query)
11f9be09 570 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 571 real_headers.update({'content-type': 'application/json'})
572 if headers:
573 real_headers.update(headers)
545cc85d 574 return self._download_json(
109dd3b2 575 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 576 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 577 data=json.dumps(data).encode('utf8'), headers=real_headers,
578 query={'key': api_key or self._extract_api_key()})
579
11f9be09 580 def extract_yt_initial_data(self, video_id, webpage):
8bdd16b4 581 return self._parse_json(
582 self._search_regex(
29f7c58a 583 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 584 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 585 video_id)
0c148415 586
a1c5d2ca 587 def _extract_identity_token(self, webpage, item_id):
11f9be09 588 if not webpage:
589 return None
590 ytcfg = self.extract_ytcfg(item_id, webpage)
a1c5d2ca
M
591 if ytcfg:
592 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
593 if token:
594 return token
595 return self._search_regex(
596 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
597 'identity token', default=None)
598
599 @staticmethod
fe93e2c4 600 def _extract_account_syncid(*args):
8ea3f7b9 601 """
602 Extract syncId required to download private playlists of secondary channels
fe93e2c4 603 @params response and/or ytcfg
8ea3f7b9 604 """
fe93e2c4 605 for data in args:
606 # ytcfg includes channel_syncid if on secondary channel
607 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
608 if delegated_sid:
609 return delegated_sid
610 sync_ids = (try_get(
611 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
612 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
613 if len(sync_ids) >= 2 and sync_ids[1]:
614 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
615 # and just "user_syncid||" for primary channel. We only want the channel_syncid
616 return sync_ids[0]
a1c5d2ca 617
11f9be09 618 def extract_ytcfg(self, video_id, webpage):
8c54a305 619 if not webpage:
620 return {}
29f7c58a 621 return self._parse_json(
622 self._search_regex(
623 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 624 default='{}'), video_id, fatal=False) or {}
625
11f9be09 626 def generate_api_headers(
627 self, ytcfg=None, identity_token=None, account_syncid=None,
000c15a4 628 visitor_data=None, api_hostname=None, default_client='web', session_index=None):
11f9be09 629 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 630 headers = {
109dd3b2 631 'X-YouTube-Client-Name': compat_str(
11f9be09 632 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
633 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
109dd3b2 634 'Origin': origin
f4f751af 635 }
2d6659b9 636 if not visitor_data and ytcfg:
637 visitor_data = try_get(
11f9be09 638 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 639 if identity_token:
109dd3b2 640 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 641 if account_syncid:
642 headers['X-Goog-PageId'] = account_syncid
314ee305 643 if session_index is None and ytcfg:
644 session_index = self._extract_session_index(ytcfg)
645 if account_syncid or session_index is not None:
646 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 647 if visitor_data:
109dd3b2 648 headers['X-Goog-Visitor-Id'] = visitor_data
649 auth = self._generate_sapisidhash_header(origin)
f4f751af 650 if auth is not None:
651 headers['Authorization'] = auth
109dd3b2 652 headers['X-Origin'] = origin
f4f751af 653 return headers
29f7c58a 654
2d6659b9 655 @staticmethod
656 def _build_api_continuation_query(continuation, ctp=None):
657 query = {
658 'continuation': continuation
659 }
660 # TODO: Inconsistency with clickTrackingParams.
661 # Currently we have a fixed ctp contained within context (from ytcfg)
662 # and a ctp in root query for continuation.
663 if ctp:
664 query['clickTracking'] = {'clickTrackingParams': ctp}
665 return query
666
2d6659b9 667 @classmethod
668 def _extract_next_continuation_data(cls, renderer):
669 next_continuation = try_get(
670 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
671 lambda x: x['continuation']['reloadContinuationData']), dict)
672 if not next_continuation:
673 return
674 continuation = next_continuation.get('continuation')
675 if not continuation:
676 return
677 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 678 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 679
680 @classmethod
681 def _extract_continuation_ep_data(cls, continuation_ep: dict):
682 if isinstance(continuation_ep, dict):
683 continuation = try_get(
684 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
685 if not continuation:
686 return
687 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 688 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 689
690 @classmethod
691 def _extract_continuation(cls, renderer):
692 next_continuation = cls._extract_next_continuation_data(renderer)
693 if next_continuation:
694 return next_continuation
fe93e2c4 695
2d6659b9 696 contents = []
697 for key in ('contents', 'items'):
698 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 699
2d6659b9 700 for content in contents:
701 if not isinstance(content, dict):
702 continue
703 continuation_ep = try_get(
704 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
705 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
706 dict)
707 continuation = cls._extract_continuation_ep_data(continuation_ep)
708 if continuation:
709 return continuation
710
fe93e2c4 711 @classmethod
712 def _extract_alerts(cls, data):
109dd3b2 713 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
714 if not isinstance(alert_dict, dict):
715 continue
716 for alert in alert_dict.values():
717 alert_type = alert.get('type')
718 if not alert_type:
719 continue
052e1350 720 message = cls._get_text(alert, 'text')
109dd3b2 721 if message:
722 yield alert_type, message
723
c0ac49bc 724 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 725 errors = []
726 warnings = []
727 for alert_type, alert_message in alerts:
641ad5d8 728 if alert_type.lower() == 'error' and fatal:
109dd3b2 729 errors.append([alert_type, alert_message])
730 else:
731 warnings.append([alert_type, alert_message])
732
733 for alert_type, alert_message in (warnings + errors[:-1]):
c0ac49bc 734 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
109dd3b2 735 if errors:
736 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
737
738 def _extract_and_report_alerts(self, data, *args, **kwargs):
739 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
740
47193e02 741 def _extract_badges(self, renderer: dict):
742 badges = set()
743 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
744 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
745 if label:
746 badges.add(label.lower())
747 return badges
748
749 @staticmethod
052e1350 750 def _get_text(data, *path_list, max_runs=None):
751 for path in path_list or [None]:
752 if path is None:
753 obj = [data]
754 else:
755 obj = traverse_obj(data, path, default=[])
756 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
757 obj = [obj]
758 for item in obj:
759 text = try_get(item, lambda x: x['simpleText'], compat_str)
760 if text:
761 return text
762 runs = try_get(item, lambda x: x['runs'], list) or []
763 if not runs and isinstance(item, list):
764 runs = item
765
766 runs = runs[:min(len(runs), max_runs or len(runs))]
767 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
768 if text:
769 return text
47193e02 770
109dd3b2 771 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
772 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 773 default_client='web'):
109dd3b2 774 response = None
775 last_error = None
776 count = -1
777 retries = self.get_param('extractor_retries', 3)
778 if check_get_keys is None:
779 check_get_keys = []
780 while count < retries:
781 count += 1
782 if last_error:
c0ac49bc 783 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
109dd3b2 784 try:
785 response = self._call_api(
786 ep=ep, fatal=True, headers=headers,
787 video_id=item_id, query=query,
788 context=self._extract_context(ytcfg, default_client),
789 api_key=self._extract_api_key(ytcfg, default_client),
790 api_hostname=api_hostname, default_client=default_client,
791 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
792 except ExtractorError as e:
9c0d7f49 793 if isinstance(e.cause, network_exceptions):
641ad5d8 794 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
795 e.cause.seek(0)
796 yt_error = try_get(
797 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
798 lambda x: x['error']['message'], compat_str)
799 if yt_error:
800 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 801 # Downloading page may result in intermittent 5xx HTTP error
802 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 803 # We also want to catch all other network exceptions since errors in later pages can be troublesome
804 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
805 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
526d74ec 806 last_error = error_to_compat_str(e.cause or e.msg)
9c0d7f49 807 if count < retries:
808 continue
109dd3b2 809 if fatal:
810 raise
811 else:
812 self.report_warning(error_to_compat_str(e))
813 return
814
815 else:
816 # Youtube may send alerts if there was an issue with the continuation page
817 try:
c0ac49bc 818 self._extract_and_report_alerts(response, expected=False, only_once=True)
109dd3b2 819 except ExtractorError as e:
c0ac49bc 820 # YouTube servers may return errors we want to retry on in a 200 OK response
821 # See: https://github.com/yt-dlp/yt-dlp/issues/839
822 if 'unknown error' in e.msg.lower():
823 last_error = e.msg
824 continue
109dd3b2 825 if fatal:
826 raise
827 self.report_warning(error_to_compat_str(e))
828 return
829 if not check_get_keys or dict_get(response, check_get_keys):
830 break
831 # Youtube sometimes sends incomplete data
832 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
833 last_error = 'Incomplete data received'
834 if count >= retries:
835 if fatal:
836 raise ExtractorError(last_error)
837 else:
838 self.report_warning(last_error)
839 return
840 return response
841
9297939e 842 @staticmethod
843 def is_music_url(url):
844 return re.match(r'https?://music\.youtube\.com/', url) is not None
845
30a074c2 846 def _extract_video(self, renderer):
847 video_id = renderer.get('videoId')
052e1350 848 title = self._get_text(renderer, 'title')
849 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 850 duration = parse_duration(self._get_text(
851 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 852 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 853 view_count = str_to_int(self._search_regex(
854 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
855 'view count', default=None))
fe93e2c4 856
052e1350 857 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
fe93e2c4 858
30a074c2 859 return {
39ed931e 860 '_type': 'url',
30a074c2 861 'ie_key': YoutubeIE.ie_key(),
862 'id': video_id,
863 'url': video_id,
864 'title': title,
865 'description': description,
866 'duration': duration,
867 'view_count': view_count,
868 'uploader': uploader,
869 }
870
0c148415 871
360e1ca5 872class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 873 IE_DESC = 'YouTube.com'
bc2ca1bb 874 _INVIDIOUS_SITES = (
875 # invidious-redirect websites
876 r'(?:www\.)?redirect\.invidious\.io',
877 r'(?:(?:www|dev)\.)?invidio\.us',
878 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
879 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 880 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 881 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 882 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 883 # youtube-dl invidious instances list
884 r'(?:(?:www|no)\.)?invidiou\.sh',
885 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
886 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 887 r'(?:www\.)?invidious\.mastodon\.host',
888 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 889 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 890 r'(?:www\.)?invidious\.tinfoil-hat\.net',
891 r'(?:www\.)?invidious\.himiko\.cloud',
892 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 893 r'(?:www\.)?invidious\.tube',
894 r'(?:www\.)?invidiou\.site',
895 r'(?:www\.)?invidious\.site',
896 r'(?:www\.)?invidious\.xyz',
897 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 898 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 899 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 900 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 901 r'(?:www\.)?tube\.poal\.co',
902 r'(?:www\.)?tube\.connect\.cafe',
903 r'(?:www\.)?vid\.wxzm\.sx',
904 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 905 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 906 r'(?:www\.)?yewtu\.be',
907 r'(?:www\.)?yt\.elukerio\.org',
908 r'(?:www\.)?yt\.lelux\.fi',
909 r'(?:www\.)?invidious\.ggc-project\.de',
910 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 911 r'(?:www\.)?ytprivate\.com',
912 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 913 r'(?:www\.)?invidious\.toot\.koeln',
914 r'(?:www\.)?invidious\.fdn\.fr',
915 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 916 r'(?:www\.)?invidious\.namazso\.eu',
917 r'(?:www\.)?invidious\.silkky\.cloud',
918 r'(?:www\.)?invidious\.exonip\.de',
919 r'(?:www\.)?invidious\.riverside\.rocks',
920 r'(?:www\.)?invidious\.blamefran\.net',
921 r'(?:www\.)?invidious\.moomoo\.de',
922 r'(?:www\.)?ytb\.trom\.tf',
923 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 924 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
925 r'(?:www\.)?qklhadlycap4cnod\.onion',
926 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
927 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
928 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
929 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
930 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
931 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 932 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
933 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
934 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
935 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 936 )
cb7dfeea 937 _VALID_URL = r"""(?x)^
c5e8d7af 938 (
edb53e2d 939 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 940 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
941 (?:www\.)?deturl\.com/www\.youtube\.com|
942 (?:www\.)?pwnyoutube\.com|
943 (?:www\.)?hooktube\.com|
944 (?:www\.)?yourepeat\.com|
945 tube\.majestyc\.net|
946 %(invidious)s|
947 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
948 (?:.*?\#/)? # handle anchor (#/) redirect urls
949 (?: # the various things that can precede the ID:
8fc54b12 950 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
c5e8d7af 951 |(?: # or the v= param in all its forms
f7000f3a 952 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 953 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 954 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
955 v=
956 )
f4b05232 957 ))
cbaed4bb
S
958 |(?:
959 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
960 vid\.plus| # or vid.plus/xxxx
961 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 962 %(invidious)s
cbaed4bb 963 )/
edb53e2d 964 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 965 )
c5e8d7af 966 )? # all until now is optional -> you can pass the naked ID
201c1459 967 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 968 (?(1).+)? # if we found the ID, everything can follow
9297939e 969 (?:\#|$)""" % {
bc2ca1bb 970 'invidious': '|'.join(_INVIDIOUS_SITES),
971 }
e40c758c 972 _PLAYER_INFO_RE = (
cc2db878 973 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
974 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 975 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 976 )
2c62dc26 977 _formats = {
c2d3cb4c 978 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
979 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
980 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
981 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
982 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
983 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
984 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
985 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 986 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 987 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
988 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
989 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
990 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
991 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
992 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 993 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 994 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
995 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 996
997
998 # 3D videos
c2d3cb4c 999 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1000 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1001 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1002 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1003 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1004 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1005 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1006
96fb5605 1007 # Apple HTTP Live Streaming
11f12195 1008 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1009 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1010 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1011 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1012 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1013 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1014 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1015 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1016
1017 # DASH mp4 video
d23028a8
S
1018 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1019 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1020 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1021 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1022 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1023 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1024 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1025 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1026 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1027 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1028 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1029 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1030
f6f1fc92 1031 # Dash mp4 audio
d23028a8
S
1032 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1033 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1034 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1035 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1036 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1037 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1038 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1039
1040 # Dash webm
d23028a8
S
1041 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1042 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1043 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1044 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1045 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1046 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1047 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1048 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1049 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1050 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1051 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1052 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1053 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1054 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1055 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1056 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1057 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1058 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1059 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1060 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1061 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1062 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1063
1064 # Dash webm audio
d23028a8
S
1065 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1066 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1067
0857baad 1068 # Dash webm audio with opus inside
d23028a8
S
1069 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1070 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1071 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1072
ce6b9a2d
PH
1073 # RTMP (unnamed)
1074 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1075
1076 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1077 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1078 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1079 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1080 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1081 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1082 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1083 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1084 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1085 }
29f7c58a 1086 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1087
fd5c4aab
S
1088 _GEO_BYPASS = False
1089
78caa52a 1090 IE_NAME = 'youtube'
2eb88d95
PH
1091 _TESTS = [
1092 {
2d3d2997 1093 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1094 'info_dict': {
1095 'id': 'BaW_jenozKc',
1096 'ext': 'mp4',
3867038a 1097 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1098 'uploader': 'Philipp Hagemeister',
1099 'uploader_id': 'phihag',
ec85ded8 1100 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
1101 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1102 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1103 'upload_date': '20121002',
3867038a 1104 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 1105 'categories': ['Science & Technology'],
3867038a 1106 'tags': ['youtube-dl'],
556dbe7f 1107 'duration': 10,
dbdaaa23 1108 'view_count': int,
3e7c1224
PH
1109 'like_count': int,
1110 'dislike_count': int,
7c80519c 1111 'start_time': 1,
297a564b 1112 'end_time': 9,
2eb88d95 1113 }
0e853ca4 1114 },
fccd3771 1115 {
4bc3a23e
PH
1116 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1117 'note': 'Embed-only video (#1746)',
1118 'info_dict': {
1119 'id': 'yZIXLfi8CZQ',
1120 'ext': 'mp4',
1121 'upload_date': '20120608',
1122 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1123 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1124 'uploader': 'SET India',
94bfcd23 1125 'uploader_id': 'setindia',
ec85ded8 1126 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1127 'age_limit': 18,
545cc85d 1128 },
1129 'skip': 'Private video',
fccd3771 1130 },
11b56058 1131 {
8bdd16b4 1132 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1133 'note': 'Use the first video ID in the URL',
1134 'info_dict': {
1135 'id': 'BaW_jenozKc',
1136 'ext': 'mp4',
3867038a 1137 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1138 'uploader': 'Philipp Hagemeister',
1139 'uploader_id': 'phihag',
ec85ded8 1140 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1141 'upload_date': '20121002',
3867038a 1142 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1143 'categories': ['Science & Technology'],
3867038a 1144 'tags': ['youtube-dl'],
556dbe7f 1145 'duration': 10,
dbdaaa23 1146 'view_count': int,
11b56058
PM
1147 'like_count': int,
1148 'dislike_count': int,
34a7de29
S
1149 },
1150 'params': {
1151 'skip_download': True,
1152 },
11b56058 1153 },
dd27fd17 1154 {
2d3d2997 1155 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1156 'note': '256k DASH audio (format 141) via DASH manifest',
1157 'info_dict': {
1158 'id': 'a9LDPn-MO4I',
1159 'ext': 'm4a',
1160 'upload_date': '20121002',
1161 'uploader_id': '8KVIDEO',
ec85ded8 1162 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1163 'description': '',
1164 'uploader': '8KVIDEO',
1165 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1166 },
4bc3a23e
PH
1167 'params': {
1168 'youtube_include_dash_manifest': True,
1169 'format': '141',
4919603f 1170 },
de3c7fe0 1171 'skip': 'format 141 not served anymore',
dd27fd17 1172 },
8bdd16b4 1173 # DASH manifest with encrypted signature
1174 {
1175 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1176 'info_dict': {
1177 'id': 'IB3lcPjvWLA',
1178 'ext': 'm4a',
1179 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1180 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1181 'duration': 244,
1182 'uploader': 'AfrojackVEVO',
1183 'uploader_id': 'AfrojackVEVO',
1184 'upload_date': '20131011',
cc2db878 1185 'abr': 129.495,
8bdd16b4 1186 },
1187 'params': {
1188 'youtube_include_dash_manifest': True,
1189 'format': '141/bestaudio[ext=m4a]',
1190 },
1191 },
65c2fde2 1192 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1193 {
65c2fde2 1194 'note': 'Embed allowed age-gate video',
2d3d2997 1195 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1196 'info_dict': {
1197 'id': 'HtVdAasjOgU',
1198 'ext': 'mp4',
1199 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1200 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1201 'duration': 142,
c522adb1
JMF
1202 'uploader': 'The Witcher',
1203 'uploader_id': 'WitcherGame',
ec85ded8 1204 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1205 'upload_date': '20140605',
34952f09 1206 'age_limit': 18,
c522adb1
JMF
1207 },
1208 },
65c2fde2 1209 {
1210 'note': 'Age-gate video with embed allowed in public site',
1211 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1212 'info_dict': {
1213 'id': 'HsUATh_Nc2U',
1214 'ext': 'mp4',
1215 'title': 'Godzilla 2 (Official Video)',
1216 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1217 'upload_date': '20200408',
1218 'uploader_id': 'FlyingKitty900',
1219 'uploader': 'FlyingKitty',
1220 'age_limit': 18,
1221 },
1222 },
1223 {
1224 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1225 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1226 'info_dict': {
1227 'id': 'Tq92D6wQ1mg',
1228 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1229 'ext': 'mp4',
1230 'upload_date': '20191227',
65c2fde2 1231 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1232 'uploader': 'Projekt Melody',
1233 'description': 'md5:17eccca93a786d51bc67646756894066',
1234 'age_limit': 18,
1235 },
1236 },
1237 {
1238 'note': 'Non-Agegated non-embeddable video',
1239 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1240 'info_dict': {
1241 'id': 'MeJVWBSsPAY',
1242 'ext': 'mp4',
1243 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1244 'uploader': 'Herr Lurik',
1245 'uploader_id': 'st3in234',
1246 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1247 'upload_date': '20130730',
1248 },
1249 },
1250 {
1251 'note': 'Non-bypassable age-gated video',
1252 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1253 'only_matching': True,
1254 },
8bdd16b4 1255 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1256 # YouTube Red ad is not captured for creator
1257 {
1258 'url': '__2ABJjxzNo',
1259 'info_dict': {
1260 'id': '__2ABJjxzNo',
1261 'ext': 'mp4',
1262 'duration': 266,
1263 'upload_date': '20100430',
1264 'uploader_id': 'deadmau5',
1265 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1266 'creator': 'deadmau5',
1267 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1268 'uploader': 'deadmau5',
1269 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1270 'alt_title': 'Some Chords',
8bdd16b4 1271 },
1272 'expected_warnings': [
1273 'DASH manifest missing',
1274 ]
1275 },
067aa17e 1276 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1277 {
1278 'url': 'lqQg6PlCWgI',
1279 'info_dict': {
1280 'id': 'lqQg6PlCWgI',
1281 'ext': 'mp4',
556dbe7f 1282 'duration': 6085,
90227264 1283 'upload_date': '20150827',
cbe2bd91 1284 'uploader_id': 'olympic',
ec85ded8 1285 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1286 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1287 'uploader': 'Olympics',
cbe2bd91
PH
1288 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1289 },
1290 'params': {
1291 'skip_download': 'requires avconv',
e52a40ab 1292 }
cbe2bd91 1293 },
6271f1ca
PH
1294 # Non-square pixels
1295 {
1296 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1297 'info_dict': {
1298 'id': '_b-2C3KPAM0',
1299 'ext': 'mp4',
1300 'stretched_ratio': 16 / 9.,
556dbe7f 1301 'duration': 85,
6271f1ca
PH
1302 'upload_date': '20110310',
1303 'uploader_id': 'AllenMeow',
ec85ded8 1304 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1305 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1306 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1307 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1308 },
06b491eb
S
1309 },
1310 # url_encoded_fmt_stream_map is empty string
1311 {
1312 'url': 'qEJwOuvDf7I',
1313 'info_dict': {
1314 'id': 'qEJwOuvDf7I',
f57b7835 1315 'ext': 'webm',
06b491eb
S
1316 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1317 'description': '',
1318 'upload_date': '20150404',
1319 'uploader_id': 'spbelect',
1320 'uploader': 'Наблюдатели Петербурга',
1321 },
1322 'params': {
1323 'skip_download': 'requires avconv',
e323cf3f
S
1324 },
1325 'skip': 'This live event has ended.',
06b491eb 1326 },
067aa17e 1327 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1328 {
1329 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1330 'info_dict': {
1331 'id': 'FIl7x6_3R5Y',
eb6793ba 1332 'ext': 'webm',
da77d856
S
1333 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1334 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1335 'duration': 220,
da77d856
S
1336 'upload_date': '20150625',
1337 'uploader_id': 'dorappi2000',
ec85ded8 1338 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1339 'uploader': 'dorappi2000',
eb6793ba 1340 'formats': 'mincount:31',
da77d856 1341 },
eb6793ba 1342 'skip': 'not actual anymore',
2ee8f5d8 1343 },
8a1a26ce
YCH
1344 # DASH manifest with segment_list
1345 {
1346 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1347 'md5': '8ce563a1d667b599d21064e982ab9e31',
1348 'info_dict': {
1349 'id': 'CsmdDsKjzN8',
1350 'ext': 'mp4',
17ee98e1 1351 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1352 'uploader': 'Airtek',
1353 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1354 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1355 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1356 },
1357 'params': {
1358 'youtube_include_dash_manifest': True,
1359 'format': '135', # bestvideo
be49068d
S
1360 },
1361 'skip': 'This live event has ended.',
2ee8f5d8 1362 },
cf7e015f
S
1363 {
1364 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1365 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1366 'info_dict': {
545cc85d 1367 'id': 'jvGDaLqkpTg',
1368 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1369 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1370 },
1371 'playlist': [{
1372 'info_dict': {
545cc85d 1373 'id': 'jvGDaLqkpTg',
cf7e015f 1374 'ext': 'mp4',
545cc85d 1375 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1376 'description': 'md5:e03b909557865076822aa169218d6a5d',
1377 'duration': 10643,
1378 'upload_date': '20161111',
1379 'uploader': 'Team PGP',
1380 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1381 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1382 },
1383 }, {
1384 'info_dict': {
545cc85d 1385 'id': '3AKt1R1aDnw',
cf7e015f 1386 'ext': 'mp4',
545cc85d 1387 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1388 'description': 'md5:e03b909557865076822aa169218d6a5d',
1389 'duration': 10991,
1390 'upload_date': '20161111',
1391 'uploader': 'Team PGP',
1392 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1393 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1394 },
1395 }, {
1396 'info_dict': {
545cc85d 1397 'id': 'RtAMM00gpVc',
cf7e015f 1398 'ext': 'mp4',
545cc85d 1399 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1400 'description': 'md5:e03b909557865076822aa169218d6a5d',
1401 'duration': 10995,
1402 'upload_date': '20161111',
1403 'uploader': 'Team PGP',
1404 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1405 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1406 },
1407 }, {
1408 'info_dict': {
545cc85d 1409 'id': '6N2fdlP3C5U',
cf7e015f 1410 'ext': 'mp4',
545cc85d 1411 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1412 'description': 'md5:e03b909557865076822aa169218d6a5d',
1413 'duration': 10990,
1414 'upload_date': '20161111',
1415 'uploader': 'Team PGP',
1416 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1417 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1418 },
1419 }],
1420 'params': {
1421 'skip_download': True,
1422 },
65c2fde2 1423 'skip': 'Not multifeed anymore',
cbaed4bb 1424 },
f9f49d87 1425 {
067aa17e 1426 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1427 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1428 'info_dict': {
1429 'id': 'gVfLd0zydlo',
1430 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1431 },
1432 'playlist_count': 2,
be49068d 1433 'skip': 'Not multifeed anymore',
f9f49d87 1434 },
cbaed4bb 1435 {
2d3d2997 1436 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1437 'only_matching': True,
0e49d9a6 1438 },
6d4fc66b 1439 {
2d3d2997 1440 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1441 'only_matching': True,
1442 },
0e49d9a6 1443 {
067aa17e 1444 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1445 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1446 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1447 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1448 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1449 'info_dict': {
1450 'id': 'lsguqyKfVQg',
1451 'ext': 'mp4',
1452 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1453 'alt_title': 'Dark Walk',
0e49d9a6 1454 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1455 'duration': 133,
0e49d9a6
LL
1456 'upload_date': '20151119',
1457 'uploader_id': 'IronSoulElf',
ec85ded8 1458 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1459 'uploader': 'IronSoulElf',
11f9be09 1460 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1461 'track': 'Dark Walk',
1462 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1463 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1464 },
1465 'params': {
1466 'skip_download': True,
1467 },
1468 },
61f92af1 1469 {
067aa17e 1470 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1471 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1472 'only_matching': True,
1473 },
313dfc45
LL
1474 {
1475 # Video with yt:stretch=17:0
1476 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1477 'info_dict': {
1478 'id': 'Q39EVAstoRM',
1479 'ext': 'mp4',
1480 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1481 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1482 'upload_date': '20151107',
1483 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1484 'uploader': 'CH GAMER DROID',
1485 },
1486 'params': {
1487 'skip_download': True,
1488 },
be49068d 1489 'skip': 'This video does not exist.',
313dfc45 1490 },
201c1459 1491 {
1492 # Video with incomplete 'yt:stretch=16:'
1493 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1494 'only_matching': True,
1495 },
7caf9830
S
1496 {
1497 # Video licensed under Creative Commons
1498 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1499 'info_dict': {
1500 'id': 'M4gD1WSo5mA',
1501 'ext': 'mp4',
1502 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1503 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1504 'duration': 721,
7caf9830
S
1505 'upload_date': '20150127',
1506 'uploader_id': 'BerkmanCenter',
ec85ded8 1507 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1508 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1509 'license': 'Creative Commons Attribution license (reuse allowed)',
1510 },
1511 'params': {
1512 'skip_download': True,
1513 },
1514 },
fd050249
S
1515 {
1516 # Channel-like uploader_url
1517 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1518 'info_dict': {
1519 'id': 'eQcmzGIKrzg',
1520 'ext': 'mp4',
1521 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1522 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1523 'duration': 4060,
fd050249 1524 'upload_date': '20151119',
eb6793ba 1525 'uploader': 'Bernie Sanders',
fd050249 1526 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1527 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1528 'license': 'Creative Commons Attribution license (reuse allowed)',
1529 },
1530 'params': {
1531 'skip_download': True,
1532 },
1533 },
040ac686
S
1534 {
1535 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1536 'only_matching': True,
7f29cf54
S
1537 },
1538 {
067aa17e 1539 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1540 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1541 'only_matching': True,
6496ccb4
S
1542 },
1543 {
1544 # Rental video preview
1545 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1546 'info_dict': {
1547 'id': 'uGpuVWrhIzE',
1548 'ext': 'mp4',
1549 'title': 'Piku - Trailer',
1550 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1551 'upload_date': '20150811',
1552 'uploader': 'FlixMatrix',
1553 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1554 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1555 'license': 'Standard YouTube License',
1556 },
1557 'params': {
1558 'skip_download': True,
1559 },
eb6793ba 1560 'skip': 'This video is not available.',
022a5d66 1561 },
12afdc2a
S
1562 {
1563 # YouTube Red video with episode data
1564 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1565 'info_dict': {
1566 'id': 'iqKdEhx-dD4',
1567 'ext': 'mp4',
1568 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1569 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1570 'duration': 2085,
12afdc2a
S
1571 'upload_date': '20170118',
1572 'uploader': 'Vsauce',
1573 'uploader_id': 'Vsauce',
1574 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1575 'series': 'Mind Field',
1576 'season_number': 1,
1577 'episode_number': 1,
1578 },
1579 'params': {
1580 'skip_download': True,
1581 },
1582 'expected_warnings': [
1583 'Skipping DASH manifest',
1584 ],
1585 },
c7121fa7
S
1586 {
1587 # The following content has been identified by the YouTube community
1588 # as inappropriate or offensive to some audiences.
1589 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1590 'info_dict': {
1591 'id': '6SJNVb0GnPI',
1592 'ext': 'mp4',
1593 'title': 'Race Differences in Intelligence',
1594 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1595 'duration': 965,
1596 'upload_date': '20140124',
1597 'uploader': 'New Century Foundation',
1598 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1599 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1600 },
1601 'params': {
1602 'skip_download': True,
1603 },
545cc85d 1604 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1605 },
022a5d66
S
1606 {
1607 # itag 212
1608 'url': '1t24XAntNCY',
1609 'only_matching': True,
fd5c4aab
S
1610 },
1611 {
1612 # geo restricted to JP
1613 'url': 'sJL6WA-aGkQ',
1614 'only_matching': True,
1615 },
cd5a74a2
S
1616 {
1617 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1618 'only_matching': True,
1619 },
bc2ca1bb 1620 {
1621 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1622 'only_matching': True,
1623 },
1624 {
1625 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1626 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1627 'only_matching': True,
1628 },
825cd268
RA
1629 {
1630 # DRM protected
1631 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1632 'only_matching': True,
4fe54c12
S
1633 },
1634 {
1635 # Video with unsupported adaptive stream type formats
1636 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1637 'info_dict': {
1638 'id': 'Z4Vy8R84T1U',
1639 'ext': 'mp4',
1640 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1641 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1642 'duration': 433,
1643 'upload_date': '20130923',
1644 'uploader': 'Amelia Putri Harwita',
1645 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1646 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1647 'formats': 'maxcount:10',
1648 },
1649 'params': {
1650 'skip_download': True,
1651 'youtube_include_dash_manifest': False,
1652 },
5429d6a9 1653 'skip': 'not actual anymore',
5caabd3c 1654 },
1655 {
822b9d9c 1656 # Youtube Music Auto-generated description
5caabd3c 1657 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1658 'info_dict': {
1659 'id': 'MgNrAu2pzNs',
1660 'ext': 'mp4',
1661 'title': 'Voyeur Girl',
1662 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1663 'upload_date': '20190312',
5429d6a9
S
1664 'uploader': 'Stephen - Topic',
1665 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1666 'artist': 'Stephen',
1667 'track': 'Voyeur Girl',
1668 'album': 'it\'s too much love to know my dear',
1669 'release_date': '20190313',
1670 'release_year': 2019,
1671 },
1672 'params': {
1673 'skip_download': True,
1674 },
1675 },
66b48727
RA
1676 {
1677 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1678 'only_matching': True,
1679 },
011e75e6
S
1680 {
1681 # invalid -> valid video id redirection
1682 'url': 'DJztXj2GPfl',
1683 'info_dict': {
1684 'id': 'DJztXj2GPfk',
1685 'ext': 'mp4',
1686 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1687 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1688 'upload_date': '20090125',
1689 'uploader': 'Prochorowka',
1690 'uploader_id': 'Prochorowka',
1691 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1692 'artist': 'Panjabi MC',
1693 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1694 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1695 },
1696 'params': {
1697 'skip_download': True,
1698 },
545cc85d 1699 'skip': 'Video unavailable',
ea74e00b
DP
1700 },
1701 {
1702 # empty description results in an empty string
1703 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1704 'info_dict': {
1705 'id': 'x41yOUIvK2k',
1706 'ext': 'mp4',
1707 'title': 'IMG 3456',
1708 'description': '',
1709 'upload_date': '20170613',
1710 'uploader_id': 'ElevageOrVert',
1711 'uploader': 'ElevageOrVert',
1712 },
1713 'params': {
1714 'skip_download': True,
1715 },
1716 },
a0566bbf 1717 {
29f7c58a 1718 # with '};' inside yt initial data (see [1])
1719 # see [2] for an example with '};' inside ytInitialPlayerResponse
1720 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1721 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1722 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1723 'info_dict': {
1724 'id': 'CHqg6qOn4no',
1725 'ext': 'mp4',
1726 'title': 'Part 77 Sort a list of simple types in c#',
1727 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1728 'upload_date': '20130831',
1729 'uploader_id': 'kudvenkat',
1730 'uploader': 'kudvenkat',
1731 },
1732 'params': {
1733 'skip_download': True,
1734 },
1735 },
29f7c58a 1736 {
1737 # another example of '};' in ytInitialData
1738 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1739 'only_matching': True,
1740 },
1741 {
1742 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1743 'only_matching': True,
1744 },
545cc85d 1745 {
cc2db878 1746 # https://github.com/ytdl-org/youtube-dl/pull/28094
1747 'url': 'OtqTfy26tG0',
1748 'info_dict': {
1749 'id': 'OtqTfy26tG0',
1750 'ext': 'mp4',
1751 'title': 'Burn Out',
1752 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1753 'upload_date': '20141120',
1754 'uploader': 'The Cinematic Orchestra - Topic',
1755 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1756 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1757 'artist': 'The Cinematic Orchestra',
1758 'track': 'Burn Out',
1759 'album': 'Every Day',
1760 'release_data': None,
1761 'release_year': None,
1762 },
1763 'params': {
1764 'skip_download': True,
1765 },
545cc85d 1766 },
bc2ca1bb 1767 {
1768 # controversial video, only works with bpctr when authenticated with cookies
1769 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1770 'only_matching': True,
1771 },
a1a7907b 1772 {
1773 # controversial video, requires bpctr/contentCheckOk
1774 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1775 'info_dict': {
1776 'id': 'SZJvDhaSDnc',
1777 'ext': 'mp4',
1778 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1779 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1780 'uploader': 'CBS This Morning',
11f9be09 1781 'uploader_id': 'CBSThisMorning',
a1a7907b 1782 'upload_date': '20140716',
1783 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1784 }
1785 },
f7ad7160 1786 {
1787 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1788 'url': 'cBvYw8_A0vQ',
1789 'info_dict': {
1790 'id': 'cBvYw8_A0vQ',
1791 'ext': 'mp4',
1792 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1793 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1794 'upload_date': '20201120',
1795 'uploader': 'Walk around Japan',
1796 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1797 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1798 },
1799 'params': {
1800 'skip_download': True,
1801 },
0fb983f6 1802 }, {
1803 # Has multiple audio streams
1804 'url': 'WaOKSUlf4TM',
1805 'only_matching': True
9297939e 1806 }, {
1807 # Requires Premium: has format 141 when requested using YTM url
1808 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1809 'only_matching': True
1810 }, {
120916da 1811 # multiple subtitles with same lang_code
1812 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1813 'only_matching': True,
109dd3b2 1814 }, {
1815 # Force use android client fallback
1816 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1817 'info_dict': {
1818 'id': 'YOelRv7fMxY',
11f9be09 1819 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1820 'ext': '3gp',
1821 'upload_date': '20210624',
1822 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1823 'uploader': 'colinfurze',
11f9be09 1824 'uploader_id': 'colinfurze',
109dd3b2 1825 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1826 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1827 },
1828 'params': {
1829 'format': '17', # 3gp format available on android
1830 'extractor_args': {'youtube': {'player_client': ['android']}},
1831 },
120916da 1832 },
109dd3b2 1833 {
1834 # Skip download of additional client configs (remix client config in this case)
1835 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1836 'only_matching': True,
1837 'params': {
1838 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1839 },
8fc54b12 1840 }, {
1841 # shorts
1842 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1843 'only_matching': True,
1844 },
2eb88d95
PH
1845 ]
1846
201c1459 1847 @classmethod
1848 def suitable(cls, url):
4dfbf869 1849 from ..utils import parse_qs
1850
201c1459 1851 qs = parse_qs(url)
1852 if qs.get('list', [None])[0]:
1853 return False
1854 return super(YoutubeIE, cls).suitable(url)
1855
e0df6211
PH
1856 def __init__(self, *args, **kwargs):
1857 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1858 self._code_cache = {}
83799698 1859 self._player_cache = {}
e0df6211 1860
109dd3b2 1861 def _extract_player_url(self, ytcfg=None, webpage=None):
1862 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
11f9be09 1863 if not player_url and webpage:
109dd3b2 1864 player_url = self._search_regex(
1865 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1866 webpage, 'player URL', fatal=False)
11f9be09 1867 if not player_url:
1868 return None
109dd3b2 1869 if player_url.startswith('//'):
1870 player_url = 'https:' + player_url
1871 elif not re.match(r'https?://', player_url):
1872 player_url = compat_urlparse.urljoin(
1873 'https://www.youtube.com', player_url)
1874 return player_url
1875
60064c53
PH
1876 def _signature_cache_id(self, example_sig):
1877 """ Return a string representation of a signature """
78caa52a 1878 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1879
e40c758c
S
1880 @classmethod
1881 def _extract_player_info(cls, player_url):
1882 for player_re in cls._PLAYER_INFO_RE:
1883 id_m = re.search(player_re, player_url)
1884 if id_m:
1885 break
1886 else:
c081b35c 1887 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1888 return id_m.group('id')
e40c758c 1889
109dd3b2 1890 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1891 player_id = self._extract_player_info(player_url)
1892 if player_id not in self._code_cache:
1893 self._code_cache[player_id] = self._download_webpage(
1894 player_url, video_id, fatal=fatal,
1895 note='Downloading player ' + player_id,
1896 errnote='Download of %s failed' % player_url)
1897 return player_id in self._code_cache
1898
e40c758c 1899 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1900 player_id = self._extract_player_info(player_url)
e0df6211 1901
c4417ddb 1902 # Read from filesystem cache
545cc85d 1903 func_id = 'js_%s_%s' % (
1904 player_id, self._signature_cache_id(example_sig))
c4417ddb 1905 assert os.path.basename(func_id) == func_id
a0e07d31 1906
69ea8ca4 1907 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1908 if cache_spec is not None:
78caa52a 1909 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1910
109dd3b2 1911 if self._load_player(video_id, player_url):
1912 code = self._code_cache[player_id]
1913 res = self._parse_sig_js(code)
e0df6211 1914
109dd3b2 1915 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1916 cache_res = res(test_string)
1917 cache_spec = [ord(c) for c in cache_res]
83799698 1918
109dd3b2 1919 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1920 return res
83799698 1921
60064c53 1922 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1923 def gen_sig_code(idxs):
1924 def _genslice(start, end, step):
78caa52a 1925 starts = '' if start == 0 else str(start)
8bcc8756 1926 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1927 steps = '' if step == 1 else (':%d' % step)
78caa52a 1928 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1929
1930 step = None
7af808a5
PH
1931 # Quelch pyflakes warnings - start will be set when step is set
1932 start = '(Never used)'
edf3e38e
PH
1933 for i, prev in zip(idxs[1:], idxs[:-1]):
1934 if step is not None:
1935 if i - prev == step:
1936 continue
1937 yield _genslice(start, prev, step)
1938 step = None
1939 continue
1940 if i - prev in [-1, 1]:
1941 step = i - prev
1942 start = prev
1943 continue
1944 else:
78caa52a 1945 yield 's[%d]' % prev
edf3e38e 1946 if step is None:
78caa52a 1947 yield 's[%d]' % i
edf3e38e
PH
1948 else:
1949 yield _genslice(start, i, step)
1950
78caa52a 1951 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1952 cache_res = func(test_string)
edf3e38e 1953 cache_spec = [ord(c) for c in cache_res]
78caa52a 1954 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1955 signature_id_tuple = '(%s)' % (
1956 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1957 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1958 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1959 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1960
e0df6211
PH
1961 def _parse_sig_js(self, jscode):
1962 funcname = self._search_regex(
abefc03f
S
1963 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1964 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
1965 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1966 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1967 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1968 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1969 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1970 # Obsolete patterns
1971 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1972 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1973 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1974 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1975 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1976 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1977 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1978 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1979 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1980
1981 jsi = JSInterpreter(jscode)
1982 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1983 return lambda s: initial_function([s])
1984
545cc85d 1985 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1986 """Turn the encrypted s field into a working signature"""
6b37f0be 1987
c8bf86d5 1988 if player_url is None:
69ea8ca4 1989 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1990
c8bf86d5 1991 try:
62af3a0e 1992 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1993 if player_id not in self._player_cache:
1994 func = self._extract_signature_function(
60064c53 1995 video_id, player_url, s
c8bf86d5
PH
1996 )
1997 self._player_cache[player_id] = func
1998 func = self._player_cache[player_id]
a06916d9 1999 if self.get_param('youtube_print_sig_code'):
60064c53 2000 self._print_sig_code(func, s)
c8bf86d5
PH
2001 return func(s)
2002 except Exception as e:
2003 tb = traceback.format_exc()
2004 raise ExtractorError(
78caa52a 2005 'Signature extraction failed: ' + tb, cause=e)
e0df6211 2006
109dd3b2 2007 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2008 """
2009 Extract signatureTimestamp (sts)
2010 Required to tell API what sig/player version is in use.
2011 """
2012 sts = None
2013 if isinstance(ytcfg, dict):
2014 sts = int_or_none(ytcfg.get('STS'))
2015
2016 if not sts:
2017 # Attempt to extract from player
2018 if player_url is None:
2019 error_msg = 'Cannot extract signature timestamp without player_url.'
2020 if fatal:
2021 raise ExtractorError(error_msg)
2022 self.report_warning(error_msg)
2023 return
2024 if self._load_player(video_id, player_url, fatal=fatal):
2025 player_id = self._extract_player_info(player_url)
2026 code = self._code_cache[player_id]
2027 sts = int_or_none(self._search_regex(
2028 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2029 'JS player signature timestamp', group='sts', fatal=fatal))
2030 return sts
2031
11f9be09 2032 def _mark_watched(self, video_id, player_responses):
352d63fd 2033 playback_url = traverse_obj(
2034 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2035 expected_type=url_or_none, get_all=False)
d77ab8e2 2036 if not playback_url:
352d63fd 2037 self.report_warning('Unable to mark watched')
d77ab8e2
S
2038 return
2039 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2040 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2041
2042 # cpn generation algorithm is reverse engineered from base.js.
2043 # In fact it works even with dummy cpn.
2044 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2045 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2046
2047 qs.update({
2048 'ver': ['2'],
2049 'cpn': [cpn],
2050 })
2051 playback_url = compat_urlparse.urlunparse(
15707c7e 2052 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
2053
2054 self._download_webpage(
2055 playback_url, video_id, 'Marking watched',
2056 'Unable to mark watched', fatal=False)
2057
66c9fa36
S
2058 @staticmethod
2059 def _extract_urls(webpage):
2060 # Embedded YouTube player
2061 entries = [
2062 unescapeHTML(mobj.group('url'))
2063 for mobj in re.finditer(r'''(?x)
2064 (?:
2065 <iframe[^>]+?src=|
2066 data-video-url=|
2067 <embed[^>]+?src=|
2068 embedSWF\(?:\s*|
2069 <object[^>]+data=|
2070 new\s+SWFObject\(
2071 )
2072 (["\'])
2073 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2074 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2075 \1''', webpage)]
2076
2077 # lazyYT YouTube embed
2078 entries.extend(list(map(
2079 unescapeHTML,
2080 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2081
2082 # Wordpress "YouTube Video Importer" plugin
2083 matches = re.findall(r'''(?x)<div[^>]+
2084 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2085 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2086 entries.extend(m[-1] for m in matches)
2087
2088 return entries
2089
2090 @staticmethod
2091 def _extract_url(webpage):
2092 urls = YoutubeIE._extract_urls(webpage)
2093 return urls[0] if urls else None
2094
97665381
PH
2095 @classmethod
2096 def extract_id(cls, url):
2097 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2098 if mobj is None:
69ea8ca4 2099 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 2100 return mobj.group('id')
c5e8d7af 2101
7c365c21 2102 def _extract_chapters_from_json(self, data, duration):
2103 chapter_list = traverse_obj(
2104 data, (
2105 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2106 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2107 ), expected_type=list)
2108
2109 return self._extract_chapters(
2110 chapter_list,
2111 chapter_time=lambda chapter: float_or_none(
2112 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2113 chapter_title=lambda chapter: traverse_obj(
2114 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2115 duration=duration)
2116
2117 def _extract_chapters_from_engagement_panel(self, data, duration):
2118 content_list = traverse_obj(
8bdd16b4 2119 data,
7c365c21 2120 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2121 expected_type=list, default=[])
052e1350 2122 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2123 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2124
2125 return next((
2126 filter(None, (
2127 self._extract_chapters(
2128 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2129 chapter_time, chapter_title, duration)
2130 for contents in content_list
2131 ))), [])
2132
2133 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2134 chapters = []
7c365c21 2135 last_chapter = {'start_time': 0}
2136 for idx, chapter in enumerate(chapter_list or []):
2137 title = chapter_title(chapter)
84213ea8
S
2138 start_time = chapter_time(chapter)
2139 if start_time is None:
2140 continue
7c365c21 2141 last_chapter['end_time'] = start_time
2142 if start_time < last_chapter['start_time']:
2143 if idx == 1:
2144 chapters.pop()
2145 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2146 else:
2147 self.report_warning(f'Invalid start time for chapter "{title}"')
2148 continue
2149 last_chapter = {'start_time': start_time, 'title': title}
2150 chapters.append(last_chapter)
2151 last_chapter['end_time'] = duration
84213ea8
S
2152 return chapters
2153
545cc85d 2154 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2155 return self._parse_json(self._search_regex(
2156 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2157 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2158
d92f5d5a 2159 @staticmethod
2160 def parse_time_text(time_text):
2161 """
2162 Parse the comment time text
2163 time_text is in the format 'X units ago (edited)'
2164 """
2165 time_text_split = time_text.split(' ')
2166 if len(time_text_split) >= 3:
da503b7a 2167 try:
2168 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2169 except ValueError:
2170 return None
d92f5d5a 2171
a1c5d2ca
M
2172 def _extract_comment(self, comment_renderer, parent=None):
2173 comment_id = comment_renderer.get('commentId')
2174 if not comment_id:
2175 return
fe93e2c4 2176
052e1350 2177 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2178
49bd8c66 2179 # note: timestamp is an estimate calculated from the current time and time_text
052e1350 2180 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
fe93e2c4 2181 time_text_dt = self.parse_time_text(time_text)
2182 if isinstance(time_text_dt, datetime.datetime):
2183 timestamp = calendar.timegm(time_text_dt.timetuple())
052e1350 2184 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2185 author_id = try_get(comment_renderer,
2186 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2187
49bd8c66 2188 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2189 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2190 author_thumbnail = try_get(comment_renderer,
2191 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2192
2193 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2194 is_favorited = 'creatorHeart' in (try_get(
2195 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2196 return {
2197 'id': comment_id,
2198 'text': text,
d92f5d5a 2199 'timestamp': timestamp,
a1c5d2ca
M
2200 'time_text': time_text,
2201 'like_count': votes,
97524332 2202 'is_favorited': is_favorited,
a1c5d2ca
M
2203 'author': author,
2204 'author_id': author_id,
2205 'author_thumbnail': author_thumbnail,
2206 'author_is_uploader': author_is_uploader,
2207 'parent': parent or 'root'
2208 }
2209
2210 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2211 ytcfg, video_id, parent=None, comment_counts=None):
2212
2213 def extract_header(contents):
2214 _total_comments = 0
2215 _continuation = None
2216 for content in contents:
2217 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2218 expected_comment_count = parse_count(self._get_text(
052e1350 2219 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2220
2d6659b9 2221 if expected_comment_count:
fe93e2c4 2222 comment_counts[1] = expected_comment_count
2223 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2224 _total_comments = comment_counts[1]
2225 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2226 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2227
2228 sort_menu_item = try_get(
2229 comments_header_renderer,
2230 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2231 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2232
2233 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2234 if not _continuation:
2235 continue
2236
2237 sort_text = sort_menu_item.get('title')
2238 if isinstance(sort_text, compat_str):
2239 sort_text = sort_text.lower()
2240 else:
2241 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2242 self.to_screen('Sorting comments by %s' % sort_text)
2243 break
2244 return _total_comments, _continuation
a1c5d2ca 2245
2d6659b9 2246 def extract_thread(contents):
a1c5d2ca
M
2247 if not parent:
2248 comment_counts[2] = 0
2249 for content in contents:
2250 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2251 comment_renderer = try_get(
2252 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2253 content, (lambda x: x['commentRenderer'], dict))
2254
2255 if not comment_renderer:
2256 continue
2257 comment = self._extract_comment(comment_renderer, parent)
2258 if not comment:
2259 continue
2260 comment_counts[0] += 1
2261 yield comment
2262 # Attempt to get the replies
2263 comment_replies_renderer = try_get(
2264 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2265
2266 if comment_replies_renderer:
2267 comment_counts[2] += 1
2268 comment_entries_iter = self._comment_entries(
f4f751af 2269 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2270 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2271
2272 for reply_comment in comment_entries_iter:
2273 yield reply_comment
2274
2d6659b9 2275 # YouTube comments have a max depth of 2
2276 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2277 if max_depth == 1 and parent:
2278 return
a1c5d2ca
M
2279 if not comment_counts:
2280 # comment so far, est. total comments, current comment thread #
2281 comment_counts = [0, 0, 0]
a1c5d2ca 2282
2d6659b9 2283 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2284 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2285 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2286 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2287 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2288
2289 visitor_data = None
2290 is_first_continuation = parent is None
a1c5d2ca
M
2291
2292 for page_num in itertools.count(0):
2293 if not continuation:
2294 break
11f9be09 2295 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2296 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2297 if page_num == 0:
2298 if is_first_continuation:
2299 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2300 else:
2d6659b9 2301 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2302 comment_counts[2], comment_prog_str)
2303 else:
2304 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2305 ' ' if parent else '', ' replies' if parent else '',
2306 page_num, comment_prog_str)
2307
2308 response = self._extract_response(
fe93e2c4 2309 item_id=None, query=continuation,
2d6659b9 2310 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2311 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2312 if not response:
2313 break
f4f751af 2314 visitor_data = try_get(
2315 response,
2316 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2317 compat_str) or visitor_data
a1c5d2ca 2318
2d6659b9 2319 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2320
2d6659b9 2321 continuation = None
2322 if isinstance(continuation_contents, list):
2323 for continuation_section in continuation_contents:
2324 if not isinstance(continuation_section, dict):
2325 continue
2326 continuation_items = try_get(
2327 continuation_section,
2328 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2329 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2330 list) or []
2331 if is_first_continuation:
2332 total_comments, continuation = extract_header(continuation_items)
2333 if total_comments:
2334 yield total_comments
2335 is_first_continuation = False
2336 if continuation:
2337 break
2338 continue
2339 count = 0
2340 for count, entry in enumerate(extract_thread(continuation_items)):
2341 yield entry
2342 continuation = self._extract_continuation({'contents': continuation_items})
2343 if continuation:
2344 # Sometimes YouTube provides a continuation without any comments
2345 # In most cases we end up just downloading these with very little comments to come.
2346 if count == 0:
2347 if not parent:
2348 self.report_warning('No comments received - assuming end of comments')
2349 continuation = None
a1c5d2ca
M
2350 break
2351
2d6659b9 2352 # Deprecated response structure
2353 elif isinstance(continuation_contents, dict):
2354 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2355 for key, continuation_renderer in continuation_contents.items():
2356 if key not in known_continuation_renderers:
2357 continue
2358 if not isinstance(continuation_renderer, dict):
2359 continue
2360 if is_first_continuation:
2361 header_continuation_items = [continuation_renderer.get('header') or {}]
2362 total_comments, continuation = extract_header(header_continuation_items)
2363 if total_comments:
2364 yield total_comments
2365 is_first_continuation = False
2366 if continuation:
2367 break
a1c5d2ca 2368
2d6659b9 2369 # Sometimes YouTube provides a continuation without any comments
2370 # In most cases we end up just downloading these with very little comments to come.
2371 count = 0
2372 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2373 yield entry
2374 continuation = self._extract_continuation(continuation_renderer)
2375 if count == 0:
2376 if not parent:
2377 self.report_warning('No comments received - assuming end of comments')
2378 continuation = None
2379 break
a1c5d2ca 2380
2d6659b9 2381 @staticmethod
2382 def _generate_comment_continuation(video_id):
2383 """
2384 Generates initial comment section continuation token from given video id
2385 """
2386 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2387 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2388 new_continuation_intlist = list(itertools.chain.from_iterable(
2389 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2390 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2391
2392 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2393 """Entry for comment extraction"""
2d6659b9 2394 def _real_comment_extract(contents):
2395 if isinstance(contents, list):
2396 for entry in contents:
2397 for key, renderer in entry.items():
2398 if key not in known_entry_comment_renderers:
2399 continue
2400 yield from self._comment_entries(
2401 renderer, video_id=video_id, ytcfg=ytcfg,
2402 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2403 account_syncid=self._extract_account_syncid(ytcfg))
2404 break
a1c5d2ca 2405 comments = []
2d6659b9 2406 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2407 estimated_total = 0
2d6659b9 2408 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
65524694 2409 # Force English regardless of account setting to prevent parsing issues
2410 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2411 ytcfg = copy.deepcopy(ytcfg)
2412 traverse_obj(
2413 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2d6659b9 2414 try:
2415 for comment in _real_comment_extract(contents):
2416 if len(comments) >= max_comments:
2417 break
2418 if isinstance(comment, int):
2419 estimated_total = comment
2420 continue
2421 comments.append(comment)
2422 except KeyboardInterrupt:
2423 self.to_screen('Interrupted by user')
d92f5d5a 2424 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2425 return {
2426 'comments': comments,
2427 'comment_count': len(comments),
2428 }
2429
109dd3b2 2430 @staticmethod
2431 def _generate_player_context(sts=None):
2432 context = {
2433 'html5Preference': 'HTML5_PREF_WANTS',
2434 }
2435 if sts is not None:
2436 context['signatureTimestamp'] = sts
2437 return {
2438 'playbackContext': {
2439 'contentPlaybackContext': context
a1a7907b 2440 },
2fd226f6 2441 'contentCheckOk': True,
2442 'racyCheckOk': True
109dd3b2 2443 }
2444
e7e94f2a
D
2445 @staticmethod
2446 def _is_agegated(player_response):
2447 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2448 return True
e7e94f2a
D
2449
2450 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2451 AGE_GATE_REASONS = (
2452 'confirm your age', 'age-restricted', 'inappropriate', # reason
2453 'age_verification_required', 'age_check_required', # status
2454 )
2455 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2456
2457 @staticmethod
2458 def _is_unplayable(player_response):
2459 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2460
11f9be09 2461 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
109dd3b2 2462
11f9be09 2463 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2464 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2465 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2466 headers = self.generate_api_headers(
2467 player_ytcfg, identity_token, syncid,
000c15a4 2468 default_client=client, session_index=session_index)
9297939e 2469
11f9be09 2470 yt_query = {'videoId': video_id}
2471 yt_query.update(self._generate_player_context(sts))
2472 return self._extract_response(
2473 item_id=video_id, ep='player', query=yt_query,
379e44ed 2474 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 2475 default_client=client,
11f9be09 2476 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2477 ) or None
2478
11f9be09 2479 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2480 requested_clients = []
000c15a4 2481 allowed_clients = sorted(
2482 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2483 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2484 for client in self._configuration_arg('player_client'):
2485 if client in allowed_clients:
2486 requested_clients.append(client)
2487 elif client == 'all':
2488 requested_clients.extend(allowed_clients)
2489 else:
2490 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2491 if not requested_clients:
2492 requested_clients = ['android', 'web']
cf7e015f 2493
11f9be09 2494 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2495 requested_clients.extend(
e7e94f2a 2496 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 2497
11f9be09 2498 return orderedSet(requested_clients)
cf7e015f 2499
c0bc527b
M
2500 def _extract_player_ytcfg(self, client, video_id):
2501 url = {
2502 'web_music': 'https://music.youtube.com',
2503 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2504 }.get(client)
2505 if not url:
2506 return {}
2507 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2508 return self.extract_ytcfg(video_id, webpage) or {}
2509
11f9be09 2510 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2511 initial_pr = None
2512 if webpage:
2513 initial_pr = self._extract_yt_initial_variable(
2514 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2515 video_id, 'initial player response')
6b09401b 2516
c0bc527b
M
2517 original_clients = clients
2518 clients = clients[::-1]
e7e94f2a
D
2519
2520 def append_client(client_name):
2521 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2522 clients.append(client_name)
2523
379e44ed 2524 # Android player_response does not have microFormats which are needed for
2525 # extraction of some data. So we return the initial_pr with formats
2526 # stripped out even if not requested by the user
2527 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2528 yielded_pr = False
2529 if initial_pr:
2530 pr = dict(initial_pr)
2531 pr['streamingData'] = None
2532 yielded_pr = True
2533 yield pr
2534
2535 last_error = None
c0bc527b
M
2536 while clients:
2537 client = clients.pop()
11f9be09 2538 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2539 if 'configs' not in self._configuration_arg('player_skip'):
2540 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b 2541
379e44ed 2542 try:
2543 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2544 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2545 except ExtractorError as e:
2546 if last_error:
2547 self.report_warning(last_error)
2548 last_error = e
2549 continue
2550
11f9be09 2551 if pr:
379e44ed 2552 yielded_pr = True
11f9be09 2553 yield pr
c0bc527b 2554
e7e94f2a
D
2555 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2556 if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header():
2557 append_client(client.replace('_agegate', '_creator'))
2558 elif self._is_agegated(pr):
2559 append_client(f'{client}_agegate')
c0bc527b 2560
379e44ed 2561 if last_error:
2562 if not yielded_pr:
2563 raise last_error
2564 self.report_warning(last_error)
11f9be09 2565
2566 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2567 itags, stream_ids = [], []
2a9c6dcd 2568 itag_qualities, res_qualities = {}, {}
d3fc8074 2569 q = qualities([
2a9c6dcd 2570 # Normally tiny is the smallest video-only formats. But
2571 # audio-only formats with unknown quality may get tagged as tiny
2572 'tiny',
2573 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2574 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2575 ])
11f9be09 2576 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2577
545cc85d 2578 for fmt in streaming_formats:
2579 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2580 continue
321bf820 2581
cc2db878 2582 itag = str_or_none(fmt.get('itag'))
9297939e 2583 audio_track = fmt.get('audioTrack') or {}
2584 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2585 if stream_id in stream_ids:
2586 continue
2587
cc2db878 2588 quality = fmt.get('quality')
2a9c6dcd 2589 height = int_or_none(fmt.get('height'))
d3fc8074 2590 if quality == 'tiny' or not quality:
2591 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2592 # The 3gp format (17) in android client has a quality of "small",
2593 # but is actually worse than other formats
2594 if itag == '17':
2595 quality = 'tiny'
2596 if quality:
2597 if itag:
2598 itag_qualities[itag] = quality
2599 if height:
2600 res_qualities[height] = quality
cc2db878 2601 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2602 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2603 # number of fragment that would subsequently requested with (`&sq=N`)
2604 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2605 continue
2606
545cc85d 2607 fmt_url = fmt.get('url')
2608 if not fmt_url:
2609 sc = compat_parse_qs(fmt.get('signatureCipher'))
2610 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2611 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2612 if not (sc and fmt_url and encrypted_sig):
2613 continue
545cc85d 2614 if not player_url:
201e9eaa 2615 continue
545cc85d 2616 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2617 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2618 fmt_url += '&' + sp + '=' + signature
2619
545cc85d 2620 if itag:
2621 itags.append(itag)
9297939e 2622 stream_ids.append(stream_id)
2623
cc2db878 2624 tbr = float_or_none(
2625 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2626 dct = {
2627 'asr': int_or_none(fmt.get('audioSampleRate')),
2628 'filesize': int_or_none(fmt.get('contentLength')),
2629 'format_id': itag,
11f9be09 2630 'format_note': ', '.join(filter(None, (
26e8e044 2631 '%s%s' % (audio_track.get('displayName') or '',
2632 ' (default)' if audio_track.get('audioIsDefault') else ''),
2a9c6dcd 2633 fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
545cc85d 2634 'fps': int_or_none(fmt.get('fps')),
2a9c6dcd 2635 'height': height,
dca3ff4a 2636 'quality': q(quality),
cc2db878 2637 'tbr': tbr,
545cc85d 2638 'url': fmt_url,
2a9c6dcd 2639 'width': int_or_none(fmt.get('width')),
0fb983f6 2640 'language': audio_track.get('id', '').split('.')[0],
26e8e044 2641 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
545cc85d 2642 }
60bdb7bd 2643 mime_mobj = re.match(
2644 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2645 if mime_mobj:
2646 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2647 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2648 no_audio = dct.get('acodec') == 'none'
2649 no_video = dct.get('vcodec') == 'none'
2650 if no_audio:
2651 dct['vbr'] = tbr
2652 if no_video:
2653 dct['abr'] = tbr
2654 if no_audio or no_video:
545cc85d 2655 dct['downloader_options'] = {
2656 # Youtube throttles chunks >~10M
2657 'http_chunk_size': 10485760,
bf1317d2 2658 }
7c60c33e 2659 if dct.get('ext'):
2660 dct['container'] = dct['ext'] + '_dash'
11f9be09 2661 yield dct
545cc85d 2662
4bb6b02f 2663 skip_manifests = self._configuration_arg('skip')
57015a4a 2664 get_dash = (
2665 (not is_live or self._configuration_arg('include_live_dash'))
2666 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
5d3a0e79 2667 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2668
2a9c6dcd 2669 def guess_quality(f):
2670 for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2671 if val in qdict:
2672 return q(qdict[val])
2673 return -1
2674
11f9be09 2675 for sd in streaming_data:
5d3a0e79 2676 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2677 if hls_manifest_url:
2a9c6dcd 2678 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
9297939e 2679 itag = self._search_regex(
2680 r'/itag/(\d+)', f['url'], 'itag', default=None)
11f9be09 2681 if itag in itags:
2682 continue
9297939e 2683 if itag:
2684 f['format_id'] = itag
11f9be09 2685 itags.append(itag)
2a9c6dcd 2686 f['quality'] = guess_quality(f)
11f9be09 2687 yield f
545cc85d 2688
5d3a0e79 2689 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2690 if dash_manifest_url:
2a9c6dcd 2691 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
5d3a0e79 2692 itag = f['format_id']
2693 if itag in itags:
2694 continue
11f9be09 2695 if itag:
2696 itags.append(itag)
2a9c6dcd 2697 f['quality'] = guess_quality(f)
5d3a0e79 2698 filesize = int_or_none(self._search_regex(
2699 r'/clen/(\d+)', f.get('fragment_base_url')
2700 or f['url'], 'file size', default=None))
2701 if filesize:
2702 f['filesize'] = filesize
11f9be09 2703 yield f
2704
2705 def _real_extract(self, url):
2706 url, smuggled_data = unsmuggle_url(url, {})
2707 video_id = self._match_id(url)
2708
2709 base_url = self.http_scheme() + '//www.youtube.com/'
2710 webpage_url = base_url + 'watch?v=' + video_id
2711 webpage = self._download_webpage(
2712 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2713
2714 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2715 player_url = self._extract_player_url(master_ytcfg, webpage)
2716 identity_token = self._extract_identity_token(webpage, video_id)
2717
2718 player_responses = list(self._extract_player_responses(
2719 self._get_requested_clients(url, smuggled_data),
2720 video_id, webpage, master_ytcfg, player_url, identity_token))
2721
352d63fd 2722 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2723
2724 playability_statuses = traverse_obj(
2725 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2726
2727 trailer_video_id = get_first(
2728 playability_statuses,
2729 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2730 expected_type=str)
2731 if trailer_video_id:
2732 return self.url_result(
2733 trailer_video_id, self.ie_key(), trailer_video_id)
2734
2735 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2736 if webpage else (lambda x: None))
2737
2738 video_details = traverse_obj(
2739 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2740 microformats = traverse_obj(
2741 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2742 expected_type=dict, default=[])
2743 video_title = (
2744 get_first(video_details, 'title')
2745 or self._get_text(microformats, (..., 'title'))
2746 or search_meta(['og:title', 'twitter:title', 'title']))
2747 video_description = get_first(video_details, 'shortDescription')
2748
2749 if not smuggled_data.get('force_singlefeed', False):
2750 if not self.get_param('noplaylist'):
2751 multifeed_metadata_list = get_first(
2752 player_responses,
2753 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2754 expected_type=str)
2755 if multifeed_metadata_list:
2756 entries = []
2757 feed_ids = []
2758 for feed in multifeed_metadata_list.split(','):
2759 # Unquote should take place before split on comma (,) since textual
2760 # fields may contain comma as well (see
2761 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2762 feed_data = compat_parse_qs(
2763 compat_urllib_parse_unquote_plus(feed))
2764
2765 def feed_entry(name):
2766 return try_get(
2767 feed_data, lambda x: x[name][0], compat_str)
2768
2769 feed_id = feed_entry('id')
2770 if not feed_id:
2771 continue
2772 feed_title = feed_entry('title')
2773 title = video_title
2774 if feed_title:
2775 title += ' (%s)' % feed_title
2776 entries.append({
2777 '_type': 'url_transparent',
2778 'ie_key': 'Youtube',
2779 'url': smuggle_url(
2780 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2781 {'force_singlefeed': True}),
2782 'title': title,
2783 })
2784 feed_ids.append(feed_id)
2785 self.to_screen(
2786 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2787 % (', '.join(feed_ids), video_id))
2788 return self.playlist_result(
2789 entries, video_id, video_title, video_description)
2790 else:
2791 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2792
7ea65411 2793 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2794 is_live = get_first(video_details, 'isLive')
7ea65411 2795 if is_live is None:
2796 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2797
2798 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2799 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2800
545cc85d 2801 if not formats:
11f9be09 2802 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 2803 self.report_drm(video_id)
11f9be09 2804 pemr = get_first(
2805 playability_statuses,
2806 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2807 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2808 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2809 if subreason:
545cc85d 2810 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2811 countries = get_first(microformats, 'availableCountries')
545cc85d 2812 if not countries:
2813 regions_allowed = search_meta('regionsAllowed')
2814 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2815 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2816 reason += f'. {subreason}'
545cc85d 2817 if reason:
b7da73eb 2818 self.raise_no_formats(reason, expected=True)
bf1317d2 2819
11f9be09 2820 for f in formats:
2a9c6dcd 2821 if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
11f9be09 2822 f['source_preference'] = -10
3619f78d 2823 # TODO: this method is not reliable
2824 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
11f9be09 2825
2a9c6dcd 2826 # Source is given priority since formats that throttle are given lower source_preference
2827 # When throttling issue is fully fixed, remove this
c311988d 2828 self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang'))
bf1317d2 2829
11f9be09 2830 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2831 if not keywords and webpage:
2832 keywords = [
2833 unescapeHTML(m.group('content'))
2834 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2835 for keyword in keywords:
2836 if keyword.startswith('yt:stretch='):
201c1459 2837 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2838 if mobj:
2839 # NB: float is intentional for forcing float division
2840 w, h = (float(v) for v in mobj.groups())
2841 if w > 0 and h > 0:
2842 ratio = w / h
2843 for f in formats:
2844 if f.get('vcodec') != 'none':
2845 f['stretched_ratio'] = ratio
2846 break
6449cd80 2847
545cc85d 2848 thumbnails = []
11f9be09 2849 thumbnail_dicts = traverse_obj(
2850 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2851 expected_type=dict, default=[])
2852 for thumbnail in thumbnail_dicts:
2853 thumbnail_url = thumbnail.get('url')
2854 if not thumbnail_url:
2855 continue
2856 # Sometimes youtube gives a wrong thumbnail URL. See:
2857 # https://github.com/yt-dlp/yt-dlp/issues/233
2858 # https://github.com/ytdl-org/youtube-dl/issues/28023
2859 if 'maxresdefault' in thumbnail_url:
2860 thumbnail_url = thumbnail_url.split('?')[0]
2861 thumbnails.append({
2862 'url': thumbnail_url,
2863 'height': int_or_none(thumbnail.get('height')),
2864 'width': int_or_none(thumbnail.get('width')),
2865 })
ff2751ac 2866 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2867 if thumbnail_url:
2868 thumbnails.append({
2869 'url': thumbnail_url,
ff2751ac 2870 })
0ba692ac 2871 # The best resolution thumbnails sometimes does not appear in the webpage
2872 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2873 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2874 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
245524e6 2875 # TODO: Test them also? - For some videos, even these don't exist
cca80fe6 2876 guaranteed_thumbnail_names = [
2877 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2878 'mqdefault', 'mq1', 'mq2', 'mq3',
2879 'default', '1', '2', '3'
2880 ]
2881 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2882 n_thumbnail_names = len(thumbnail_names)
2883
0ba692ac 2884 thumbnails.extend({
2885 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2886 video_id=video_id, name=name, ext=ext,
2887 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2888 '_test_url': name in hq_thumbnail_names,
2889 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2890 for thumb in thumbnails:
cca80fe6 2891 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2892 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2893 self._remove_duplicate_formats(thumbnails)
545cc85d 2894
7ea65411 2895 category = get_first(microformats, 'category') or search_meta('genre')
2896 channel_id = str_or_none(
2897 get_first(video_details, 'channelId')
2898 or get_first(microformats, 'externalChannelId')
2899 or search_meta('channelId'))
2900 duration = int_or_none(
2901 get_first(video_details, 'lengthSeconds')
2902 or get_first(microformats, 'lengthSeconds')
2903 or parse_duration(search_meta('duration'))) or None
2904 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2905
2906 live_content = get_first(video_details, 'isLiveContent')
2907 is_upcoming = get_first(video_details, 'isUpcoming')
2908 if is_live is None:
2909 if is_upcoming or live_content is False:
2910 is_live = False
2911 if is_upcoming is None and (live_content or is_live):
2912 is_upcoming = False
2913 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2914 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2915 if not duration and live_endtime and live_starttime:
2916 duration = live_endtime - live_starttime
2917
545cc85d 2918 info = {
2919 'id': video_id,
2920 'title': self._live_title(video_title) if is_live else video_title,
2921 'formats': formats,
2922 'thumbnails': thumbnails,
2923 'description': video_description,
2924 'upload_date': unified_strdate(
11f9be09 2925 get_first(microformats, 'uploadDate')
545cc85d 2926 or search_meta('uploadDate')),
11f9be09 2927 'uploader': get_first(video_details, 'author'),
545cc85d 2928 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2929 'uploader_url': owner_profile_url,
2930 'channel_id': channel_id,
11f9be09 2931 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2932 'duration': duration,
2933 'view_count': int_or_none(
11f9be09 2934 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2935 or search_meta('interactionCount')),
11f9be09 2936 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2937 'age_limit': 18 if (
11f9be09 2938 get_first(microformats, 'isFamilySafe') is False
545cc85d 2939 or search_meta('isFamilyFriendly') == 'false'
2940 or search_meta('og:restrictions:age') == '18+') else 0,
2941 'webpage_url': webpage_url,
2942 'categories': [category] if category else None,
2943 'tags': keywords,
11f9be09 2944 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2945 'is_live': is_live,
2946 'was_live': (False if is_live or is_upcoming or live_content is False
2947 else None if is_live is None or is_upcoming is None
2948 else live_content),
2949 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2950 'release_timestamp': live_starttime,
545cc85d 2951 }
b477fc13 2952
3944e7af 2953 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2954 # Converted into dicts to remove duplicates
2955 captions = {
2956 sub.get('baseUrl'): sub
2957 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2958 translation_languages = {
2959 lang.get('languageCode'): lang.get('languageName')
2960 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
545cc85d 2961 subtitles = {}
2962 if pctr:
774d79cc 2963 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2964 lang_subs = container.setdefault(lang_code, [])
545cc85d 2965 for fmt in self._SUBTITLE_FORMATS:
2966 query.update({
2967 'fmt': fmt,
2968 })
2969 lang_subs.append({
2970 'ext': fmt,
2971 'url': update_url_query(base_url, query),
774d79cc 2972 'name': sub_name,
545cc85d 2973 })
7e72694b 2974
3944e7af 2975 for base_url, caption_track in captions.items():
545cc85d 2976 if not base_url:
2977 continue
2978 if caption_track.get('kind') != 'asr':
120916da 2979 lang_code = (
2980 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2981 or caption_track.get('languageCode'))
545cc85d 2982 if not lang_code:
2983 continue
2984 process_language(
774d79cc 2985 subtitles, base_url, lang_code,
a7429aa9 2986 traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False),
774d79cc 2987 {})
545cc85d 2988 continue
2989 automatic_captions = {}
3944e7af 2990 for trans_code, trans_name in translation_languages.items():
2991 if not trans_code:
545cc85d 2992 continue
2993 process_language(
3944e7af 2994 automatic_captions, base_url, trans_code,
2995 self._get_text(trans_name, max_runs=1),
2996 {'tlang': trans_code})
545cc85d 2997 info['automatic_captions'] = automatic_captions
2998 info['subtitles'] = subtitles
7e72694b 2999
545cc85d 3000 parsed_url = compat_urllib_parse_urlparse(url)
3001 for component in [parsed_url.fragment, parsed_url.query]:
3002 query = compat_parse_qs(component)
3003 for k, v in query.items():
3004 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3005 d_k += '_time'
3006 if d_k not in info and k in s_ks:
3007 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3008
3009 # Youtube Music Auto-generated description
822b9d9c 3010 if video_description:
38d70284 3011 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 3012 if mobj:
822b9d9c
RA
3013 release_year = mobj.group('release_year')
3014 release_date = mobj.group('release_date')
3015 if release_date:
3016 release_date = release_date.replace('-', '')
3017 if not release_year:
545cc85d 3018 release_year = release_date[:4]
3019 info.update({
3020 'album': mobj.group('album'.strip()),
3021 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3022 'track': mobj.group('track').strip(),
3023 'release_date': release_date,
cc2db878 3024 'release_year': int_or_none(release_year),
545cc85d 3025 })
7e72694b 3026
545cc85d 3027 initial_data = None
3028 if webpage:
3029 initial_data = self._extract_yt_initial_variable(
3030 webpage, self._YT_INITIAL_DATA_RE, video_id,
3031 'yt initial data')
3032 if not initial_data:
11f9be09 3033 headers = self.generate_api_headers(
3034 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
3035 session_index=self._extract_session_index(master_ytcfg))
3036
109dd3b2 3037 initial_data = self._extract_response(
3038 item_id=video_id, ep='next', fatal=False,
11f9be09 3039 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
109dd3b2 3040 note='Downloading initial data API JSON')
545cc85d 3041
c60ee3a2 3042 try:
3043 # This will error if there is no livechat
3044 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3045 info['subtitles']['live_chat'] = [{
3046 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3047 'video_id': video_id,
3048 'ext': 'json',
f6745c49 3049 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3050 }]
3051 except (KeyError, IndexError, TypeError):
3052 pass
545cc85d 3053
3054 if initial_data:
7c365c21 3055 info['chapters'] = (
3056 self._extract_chapters_from_json(initial_data, duration)
3057 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3058 or None)
545cc85d 3059
3060 contents = try_get(
3061 initial_data,
3062 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3063 list) or []
3064 for content in contents:
3065 vpir = content.get('videoPrimaryInfoRenderer')
3066 if vpir:
3067 stl = vpir.get('superTitleLink')
3068 if stl:
fe93e2c4 3069 stl = self._get_text(stl)
545cc85d 3070 if try_get(
3071 vpir,
3072 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3073 info['location'] = stl
3074 else:
3075 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3076 if mobj:
3077 info.update({
3078 'series': mobj.group(1),
3079 'season_number': int(mobj.group(2)),
3080 'episode_number': int(mobj.group(3)),
3081 })
3082 for tlb in (try_get(
3083 vpir,
3084 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3085 list) or []):
3086 tbr = tlb.get('toggleButtonRenderer') or {}
3087 for getter, regex in [(
3088 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3089 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3090 lambda x: x['accessibility'],
3091 lambda x: x['accessibilityData']['accessibilityData'],
3092 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3093 label = (try_get(tbr, getter, dict) or {}).get('label')
3094 if label:
3095 mobj = re.match(regex, label)
3096 if mobj:
3097 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3098 break
3099 sbr_tooltip = try_get(
3100 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3101 if sbr_tooltip:
3102 like_count, dislike_count = sbr_tooltip.split(' / ')
3103 info.update({
3104 'like_count': str_to_int(like_count),
3105 'dislike_count': str_to_int(dislike_count),
3106 })
3107 vsir = content.get('videoSecondaryInfoRenderer')
3108 if vsir:
052e1350 3109 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3110 rows = try_get(
3111 vsir,
3112 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3113 list) or []
3114 multiple_songs = False
3115 for row in rows:
3116 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3117 multiple_songs = True
3118 break
3119 for row in rows:
3120 mrr = row.get('metadataRowRenderer') or {}
3121 mrr_title = mrr.get('title')
3122 if not mrr_title:
3123 continue
052e1350 3124 mrr_title = self._get_text(mrr, 'title')
3125 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3126 if mrr_title == 'License':
3127 info['license'] = mrr_contents_text
3128 elif not multiple_songs:
3129 if mrr_title == 'Album':
3130 info['album'] = mrr_contents_text
3131 elif mrr_title == 'Artist':
3132 info['artist'] = mrr_contents_text
3133 elif mrr_title == 'Song':
3134 info['track'] = mrr_contents_text
3135
3136 fallbacks = {
3137 'channel': 'uploader',
3138 'channel_id': 'uploader_id',
3139 'channel_url': 'uploader_url',
3140 }
3141 for to, frm in fallbacks.items():
3142 if not info.get(to):
3143 info[to] = info.get(frm)
3144
3145 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3146 v = info.get(s_k)
3147 if v:
3148 info[d_k] = v
b84071c0 3149
11f9be09 3150 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3151 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3152 is_membersonly = None
b28f8d24 3153 is_premium = None
c224251a
M
3154 if initial_data and is_private is not None:
3155 is_membersonly = False
b28f8d24 3156 is_premium = False
47193e02 3157 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3158 badge_labels = set()
3159 for content in contents:
3160 if not isinstance(content, dict):
3161 continue
3162 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3163 for badge_label in badge_labels:
3164 if badge_label.lower() == 'members only':
3165 is_membersonly = True
3166 elif badge_label.lower() == 'premium':
3167 is_premium = True
3168 elif badge_label.lower() == 'unlisted':
3169 is_unlisted = True
c224251a 3170
c224251a
M
3171 info['availability'] = self._availability(
3172 is_private=is_private,
b28f8d24 3173 needs_premium=is_premium,
c224251a
M
3174 needs_subscription=is_membersonly,
3175 needs_auth=info['age_limit'] >= 18,
3176 is_unlisted=None if is_private is None else is_unlisted)
3177
0bb1bc1b 3178 if self.get_param('getcomments', False):
11f9be09 3179 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3180
11f9be09 3181 self.mark_watched(video_id, player_responses)
d77ab8e2 3182
545cc85d 3183 return info
c5e8d7af 3184
5f6a1245 3185
8bdd16b4 3186class YoutubeTabIE(YoutubeBaseInfoExtractor):
3187 IE_DESC = 'YouTube.com tab'
70d5c17b 3188 _VALID_URL = r'''(?x)
3189 https?://
3190 (?:\w+\.)?
3191 (?:
3192 youtube(?:kids)?\.com|
3193 invidio\.us
3194 )/
3195 (?:
fe03a6cd 3196 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3197 (?P<not_channel>
9ba5705a 3198 feed/|hashtag/|
70d5c17b 3199 (?:playlist|watch)\?.*?\blist=
3200 )|
29f7c58a 3201 (?!(?:%s)\b) # Direct URLs
70d5c17b 3202 )
3203 (?P<id>[^/?\#&]+)
3204 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3205 IE_NAME = 'youtube:tab'
3206
81127aa5 3207 _TESTS = [{
da692b79 3208 'note': 'playlists, multipage',
8bdd16b4 3209 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3210 'playlist_mincount': 94,
3211 'info_dict': {
3212 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3213 'title': 'Игорь Клейнер - Playlists',
3214 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3215 'uploader': 'Игорь Клейнер',
3216 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3217 },
3218 }, {
da692b79 3219 'note': 'playlists, multipage, different order',
8bdd16b4 3220 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3221 'playlist_mincount': 94,
3222 'info_dict': {
3223 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3224 'title': 'Игорь Клейнер - Playlists',
3225 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3226 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3227 'uploader': 'Игорь Клейнер',
8bdd16b4 3228 },
201c1459 3229 }, {
da692b79 3230 'note': 'playlists, series',
201c1459 3231 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3232 'playlist_mincount': 5,
3233 'info_dict': {
3234 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3235 'title': '3Blue1Brown - Playlists',
3236 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3237 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3238 'uploader': '3Blue1Brown',
201c1459 3239 },
8bdd16b4 3240 }, {
da692b79 3241 'note': 'playlists, singlepage',
8bdd16b4 3242 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3243 'playlist_mincount': 4,
3244 'info_dict': {
3245 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3246 'title': 'ThirstForScience - Playlists',
3247 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3248 'uploader': 'ThirstForScience',
3249 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3250 }
3251 }, {
3252 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3253 'only_matching': True,
3254 }, {
da692b79 3255 'note': 'basic, single video playlist',
0e30a7b9 3256 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3257 'info_dict': {
0e30a7b9 3258 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3259 'uploader': 'Sergey M.',
3260 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3261 'title': 'youtube-dl public playlist',
81127aa5 3262 },
0e30a7b9 3263 'playlist_count': 1,
9291475f 3264 }, {
da692b79 3265 'note': 'empty playlist',
0e30a7b9 3266 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3267 'info_dict': {
0e30a7b9 3268 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3269 'uploader': 'Sergey M.',
3270 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3271 'title': 'youtube-dl empty playlist',
9291475f
PH
3272 },
3273 'playlist_count': 0,
3274 }, {
da692b79 3275 'note': 'Home tab',
8bdd16b4 3276 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3277 'info_dict': {
8bdd16b4 3278 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3279 'title': 'lex will - Home',
3280 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3281 'uploader': 'lex will',
3282 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3283 },
8bdd16b4 3284 'playlist_mincount': 2,
9291475f 3285 }, {
da692b79 3286 'note': 'Videos tab',
8bdd16b4 3287 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3288 'info_dict': {
8bdd16b4 3289 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3290 'title': 'lex will - Videos',
3291 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3292 'uploader': 'lex will',
3293 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3294 },
8bdd16b4 3295 'playlist_mincount': 975,
9291475f 3296 }, {
da692b79 3297 'note': 'Videos tab, sorted by popular',
8bdd16b4 3298 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3299 'info_dict': {
8bdd16b4 3300 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3301 'title': 'lex will - Videos',
3302 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3303 'uploader': 'lex will',
3304 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3305 },
8bdd16b4 3306 'playlist_mincount': 199,
9291475f 3307 }, {
da692b79 3308 'note': 'Playlists tab',
8bdd16b4 3309 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3310 'info_dict': {
8bdd16b4 3311 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3312 'title': 'lex will - Playlists',
3313 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3314 'uploader': 'lex will',
3315 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3316 },
8bdd16b4 3317 'playlist_mincount': 17,
ac7553d0 3318 }, {
da692b79 3319 'note': 'Community tab',
8bdd16b4 3320 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3321 'info_dict': {
8bdd16b4 3322 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3323 'title': 'lex will - Community',
3324 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3325 'uploader': 'lex will',
3326 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3327 },
3328 'playlist_mincount': 18,
87dadd45 3329 }, {
da692b79 3330 'note': 'Channels tab',
8bdd16b4 3331 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3332 'info_dict': {
8bdd16b4 3333 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3334 'title': 'lex will - Channels',
3335 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3336 'uploader': 'lex will',
3337 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3338 },
deaec5af 3339 'playlist_mincount': 12,
cd684175 3340 }, {
3341 'note': 'Search tab',
3342 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3343 'playlist_mincount': 40,
3344 'info_dict': {
3345 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3346 'title': '3Blue1Brown - Search - linear algebra',
3347 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3348 'uploader': '3Blue1Brown',
3349 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3350 },
6b08cdf6 3351 }, {
a0566bbf 3352 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3353 'only_matching': True,
3354 }, {
a0566bbf 3355 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3356 'only_matching': True,
3357 }, {
a0566bbf 3358 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3359 'only_matching': True,
3360 }, {
3361 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3362 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3363 'info_dict': {
3364 'title': '29C3: Not my department',
3365 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3366 'uploader': 'Christiaan008',
3367 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3368 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3369 },
3370 'playlist_count': 96,
3371 }, {
3372 'note': 'Large playlist',
3373 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3374 'info_dict': {
8bdd16b4 3375 'title': 'Uploads from Cauchemar',
3376 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3377 'uploader': 'Cauchemar',
3378 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3379 },
8bdd16b4 3380 'playlist_mincount': 1123,
3381 }, {
da692b79 3382 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3383 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3384 'only_matching': True,
4b7df0d3
JMF
3385 }, {
3386 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3387 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3388 'info_dict': {
acf757f4
PH
3389 'title': 'Uploads from Interstellar Movie',
3390 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3391 'uploader': 'Interstellar Movie',
8bdd16b4 3392 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3393 },
481cc733 3394 'playlist_mincount': 21,
358de58c 3395 }, {
3396 'note': 'Playlist with "show unavailable videos" button',
3397 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3398 'info_dict': {
3399 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3400 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3401 'uploader': 'Phim Siêu Nhân Nhật Bản',
3402 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3403 },
da692b79 3404 'playlist_mincount': 200,
5d342002 3405 }, {
da692b79 3406 'note': 'Playlist with unavailable videos in page 7',
5d342002 3407 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3408 'info_dict': {
3409 'title': 'Uploads from BlankTV',
3410 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3411 'uploader': 'BlankTV',
3412 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3413 },
da692b79 3414 'playlist_mincount': 1000,
8bdd16b4 3415 }, {
da692b79 3416 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3417 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3418 'info_dict': {
3419 'title': 'Data Analysis with Dr Mike Pound',
3420 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3421 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3422 'uploader': 'Computerphile',
deaec5af 3423 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3424 },
3425 'playlist_mincount': 11,
3426 }, {
a0566bbf 3427 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3428 'only_matching': True,
dacb3a86 3429 }, {
da692b79 3430 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3431 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3432 'info_dict': {
3433 'id': 'FqZTN594JQw',
3434 'ext': 'webm',
3435 'title': "Smiley's People 01 detective, Adventure Series, Action",
3436 'uploader': 'STREEM',
3437 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3438 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3439 'upload_date': '20150526',
3440 'license': 'Standard YouTube License',
3441 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3442 'categories': ['People & Blogs'],
3443 'tags': list,
dbdaaa23 3444 'view_count': int,
dacb3a86
S
3445 'like_count': int,
3446 'dislike_count': int,
3447 },
3448 'params': {
3449 'skip_download': True,
3450 },
13a75688 3451 'skip': 'This video is not available.',
dacb3a86 3452 'add_ie': [YoutubeIE.ie_key()],
481cc733 3453 }, {
8bdd16b4 3454 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3455 'only_matching': True,
66b48727 3456 }, {
8bdd16b4 3457 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3458 'only_matching': True,
a0566bbf 3459 }, {
3460 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3461 'info_dict': {
57015a4a 3462 'id': '3yImotZU3tw', # This will keep changing
a0566bbf 3463 'ext': 'mp4',
deaec5af 3464 'title': compat_str,
a0566bbf 3465 'uploader': 'Sky News',
3466 'uploader_id': 'skynews',
3467 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3468 'upload_date': r're:\d{8}',
3469 'description': compat_str,
a0566bbf 3470 'categories': ['News & Politics'],
3471 'tags': list,
3472 'like_count': int,
3473 'dislike_count': int,
3474 },
3475 'params': {
3476 'skip_download': True,
3477 },
da692b79 3478 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3479 }, {
3480 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3481 'info_dict': {
3482 'id': 'a48o2S1cPoo',
3483 'ext': 'mp4',
3484 'title': 'The Young Turks - Live Main Show',
3485 'uploader': 'The Young Turks',
3486 'uploader_id': 'TheYoungTurks',
3487 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3488 'upload_date': '20150715',
3489 'license': 'Standard YouTube License',
3490 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3491 'categories': ['News & Politics'],
3492 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3493 'like_count': int,
3494 'dislike_count': int,
3495 },
3496 'params': {
3497 'skip_download': True,
3498 },
3499 'only_matching': True,
3500 }, {
3501 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3502 'only_matching': True,
3503 }, {
3504 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3505 'only_matching': True,
09f1580e 3506 }, {
3507 'note': 'A channel that is not live. Should raise error',
3508 'url': 'https://www.youtube.com/user/numberphile/live',
3509 'only_matching': True,
3d3dddc9 3510 }, {
3511 'url': 'https://www.youtube.com/feed/trending',
3512 'only_matching': True,
3513 }, {
3d3dddc9 3514 'url': 'https://www.youtube.com/feed/library',
3515 'only_matching': True,
3516 }, {
3d3dddc9 3517 'url': 'https://www.youtube.com/feed/history',
3518 'only_matching': True,
3519 }, {
3d3dddc9 3520 'url': 'https://www.youtube.com/feed/subscriptions',
3521 'only_matching': True,
3522 }, {
3d3dddc9 3523 'url': 'https://www.youtube.com/feed/watch_later',
3524 'only_matching': True,
3525 }, {
da692b79 3526 'note': 'Recommended - redirects to home page',
3d3dddc9 3527 'url': 'https://www.youtube.com/feed/recommended',
3528 'only_matching': True,
29f7c58a 3529 }, {
da692b79 3530 'note': 'inline playlist with not always working continuations',
29f7c58a 3531 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3532 'only_matching': True,
3533 }, {
3534 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3535 'only_matching': True,
3536 }, {
3537 'url': 'https://www.youtube.com/course',
3538 'only_matching': True,
3539 }, {
3540 'url': 'https://www.youtube.com/zsecurity',
3541 'only_matching': True,
3542 }, {
3543 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3544 'only_matching': True,
3545 }, {
3546 'url': 'https://www.youtube.com/TheYoungTurks/live',
3547 'only_matching': True,
39ed931e 3548 }, {
3549 'url': 'https://www.youtube.com/hashtag/cctv9',
3550 'info_dict': {
3551 'id': 'cctv9',
3552 'title': '#cctv9',
3553 },
3554 'playlist_mincount': 350,
201c1459 3555 }, {
3556 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3557 'only_matching': True,
9297939e 3558 }, {
da692b79 3559 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3560 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3561 'only_matching': True
fe03a6cd 3562 }, {
3563 'note': '/browse/ should redirect to /channel/',
3564 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3565 'only_matching': True
3566 }, {
3567 'note': 'VLPL, should redirect to playlist?list=PL...',
3568 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3569 'info_dict': {
3570 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3571 'uploader': 'NoCopyrightSounds',
3572 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3573 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3574 'title': 'NCS Releases',
3575 },
3576 'playlist_mincount': 166,
18db7548 3577 }, {
3578 'note': 'Topic, should redirect to playlist?list=UU...',
3579 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3580 'info_dict': {
3581 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3582 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3583 'title': 'Uploads from Royalty Free Music - Topic',
3584 'uploader': 'Royalty Free Music - Topic',
3585 },
3586 'expected_warnings': [
3587 'A channel/user page was given',
3588 'The URL does not have a videos tab',
3589 ],
3590 'playlist_mincount': 101,
3591 }, {
3592 'note': 'Topic without a UU playlist',
3593 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3594 'info_dict': {
3595 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3596 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3597 },
3598 'expected_warnings': [
3599 'A channel/user page was given',
3600 'The URL does not have a videos tab',
3601 'Falling back to channel URL',
3602 ],
3603 'playlist_mincount': 9,
abcdd12b 3604 }, {
3605 'note': 'Youtube music Album',
3606 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3607 'info_dict': {
3608 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3609 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3610 },
3611 'playlist_count': 50,
47193e02 3612 }, {
3613 'note': 'unlisted single video playlist',
3614 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3615 'info_dict': {
3616 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3617 'uploader': 'colethedj',
3618 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3619 'title': 'yt-dlp unlisted playlist test',
3620 'availability': 'unlisted'
3621 },
3622 'playlist_count': 1,
29f7c58a 3623 }]
3624
3625 @classmethod
3626 def suitable(cls, url):
3627 return False if YoutubeIE.suitable(url) else super(
3628 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3629
3630 def _extract_channel_id(self, webpage):
3631 channel_id = self._html_search_meta(
3632 'channelId', webpage, 'channel id', default=None)
3633 if channel_id:
3634 return channel_id
3635 channel_url = self._html_search_meta(
3636 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3637 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3638 'twitter:app:url:googleplay'), webpage, 'channel url')
3639 return self._search_regex(
3640 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3641 channel_url, 'channel id')
15f6397c 3642
8bdd16b4 3643 @staticmethod
cd7c66cf 3644 def _extract_basic_item_renderer(item):
3645 # Modified from _extract_grid_item_renderer
201c1459 3646 known_basic_renderers = (
3647 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3648 )
3649 for key, renderer in item.items():
201c1459 3650 if not isinstance(renderer, dict):
cd7c66cf 3651 continue
201c1459 3652 elif key in known_basic_renderers:
3653 return renderer
3654 elif key.startswith('grid') and key.endswith('Renderer'):
3655 return renderer
8bdd16b4 3656
8bdd16b4 3657 def _grid_entries(self, grid_renderer):
3658 for item in grid_renderer['items']:
3659 if not isinstance(item, dict):
39b62db1 3660 continue
cd7c66cf 3661 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3662 if not isinstance(renderer, dict):
3663 continue
052e1350 3664 title = self._get_text(renderer, 'title')
fe93e2c4 3665
8bdd16b4 3666 # playlist
3667 playlist_id = renderer.get('playlistId')
3668 if playlist_id:
3669 yield self.url_result(
3670 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3671 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3672 video_title=title)
201c1459 3673 continue
8bdd16b4 3674 # video
3675 video_id = renderer.get('videoId')
3676 if video_id:
3677 yield self._extract_video(renderer)
201c1459 3678 continue
8bdd16b4 3679 # channel
3680 channel_id = renderer.get('channelId')
3681 if channel_id:
8bdd16b4 3682 yield self.url_result(
3683 'https://www.youtube.com/channel/%s' % channel_id,
3684 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3685 continue
3686 # generic endpoint URL support
3687 ep_url = urljoin('https://www.youtube.com/', try_get(
3688 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3689 compat_str))
3690 if ep_url:
3691 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3692 if ie.suitable(ep_url):
3693 yield self.url_result(
3694 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3695 break
8bdd16b4 3696
3d3dddc9 3697 def _shelf_entries_from_content(self, shelf_renderer):
3698 content = shelf_renderer.get('content')
3699 if not isinstance(content, dict):
8bdd16b4 3700 return
cd7c66cf 3701 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3702 if renderer:
3703 # TODO: add support for nested playlists so each shelf is processed
3704 # as separate playlist
3705 # TODO: this includes only first N items
3706 for entry in self._grid_entries(renderer):
3707 yield entry
3708 renderer = content.get('horizontalListRenderer')
3709 if renderer:
3710 # TODO
3711 pass
8bdd16b4 3712
29f7c58a 3713 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3714 ep = try_get(
3715 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3716 compat_str)
3717 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3718 if shelf_url:
29f7c58a 3719 # Skipping links to another channels, note that checking for
3720 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3721 # will not work
3722 if skip_channels and '/channels?' in shelf_url:
3723 return
052e1350 3724 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3725 yield self.url_result(shelf_url, video_title=title)
3726 # Shelf may not contain shelf URL, fallback to extraction from content
3727 for entry in self._shelf_entries_from_content(shelf_renderer):
3728 yield entry
c5e8d7af 3729
8bdd16b4 3730 def _playlist_entries(self, video_list_renderer):
3731 for content in video_list_renderer['contents']:
3732 if not isinstance(content, dict):
3733 continue
3734 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3735 if not isinstance(renderer, dict):
3736 continue
3737 video_id = renderer.get('videoId')
3738 if not video_id:
3739 continue
3740 yield self._extract_video(renderer)
07aeced6 3741
3462ffa8 3742 def _rich_entries(self, rich_grid_renderer):
3743 renderer = try_get(
70d5c17b 3744 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3745 video_id = renderer.get('videoId')
3746 if not video_id:
3747 return
3748 yield self._extract_video(renderer)
3749
8bdd16b4 3750 def _video_entry(self, video_renderer):
3751 video_id = video_renderer.get('videoId')
3752 if video_id:
3753 return self._extract_video(video_renderer)
dacb3a86 3754
8bdd16b4 3755 def _post_thread_entries(self, post_thread_renderer):
3756 post_renderer = try_get(
3757 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3758 if not post_renderer:
3759 return
3760 # video attachment
3761 video_renderer = try_get(
895b0931 3762 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3763 video_id = video_renderer.get('videoId')
3764 if video_id:
3765 entry = self._extract_video(video_renderer)
8bdd16b4 3766 if entry:
3767 yield entry
895b0931 3768 # playlist attachment
3769 playlist_id = try_get(
3770 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3771 if playlist_id:
3772 yield self.url_result(
e28f1c0a 3773 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3774 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3775 # inline video links
3776 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3777 for run in runs:
3778 if not isinstance(run, dict):
3779 continue
3780 ep_url = try_get(
3781 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3782 if not ep_url:
3783 continue
3784 if not YoutubeIE.suitable(ep_url):
3785 continue
3786 ep_video_id = YoutubeIE._match_id(ep_url)
3787 if video_id == ep_video_id:
3788 continue
895b0931 3789 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3790
8bdd16b4 3791 def _post_thread_continuation_entries(self, post_thread_continuation):
3792 contents = post_thread_continuation.get('contents')
3793 if not isinstance(contents, list):
3794 return
3795 for content in contents:
3796 renderer = content.get('backstagePostThreadRenderer')
3797 if not isinstance(renderer, dict):
3798 continue
3799 for entry in self._post_thread_entries(renderer):
3800 yield entry
07aeced6 3801
39ed931e 3802 r''' # unused
3803 def _rich_grid_entries(self, contents):
3804 for content in contents:
3805 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3806 if video_renderer:
3807 entry = self._video_entry(video_renderer)
3808 if entry:
3809 yield entry
3810 '''
f4f751af 3811 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3812
70d5c17b 3813 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3814 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3815 for content in contents:
3816 if not isinstance(content, dict):
8bdd16b4 3817 continue
70d5c17b 3818 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3819 if not is_renderer:
70d5c17b 3820 renderer = content.get('richItemRenderer')
3462ffa8 3821 if renderer:
3822 for entry in self._rich_entries(renderer):
3823 yield entry
3824 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3825 continue
3462ffa8 3826 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3827 for isr_content in isr_contents:
3828 if not isinstance(isr_content, dict):
3829 continue
69184e41 3830
3831 known_renderers = {
3832 'playlistVideoListRenderer': self._playlist_entries,
3833 'gridRenderer': self._grid_entries,
3834 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3835 'backstagePostThreadRenderer': self._post_thread_entries,
3836 'videoRenderer': lambda x: [self._video_entry(x)],
3837 }
3838 for key, renderer in isr_content.items():
3839 if key not in known_renderers:
3840 continue
3841 for entry in known_renderers[key](renderer):
3842 if entry:
3843 yield entry
3462ffa8 3844 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3845 break
70d5c17b 3846
3462ffa8 3847 if not continuation_list[0]:
3848 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3849
3850 if not continuation_list[0]:
3851 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3852
3853 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3854 tab_content = try_get(tab, lambda x: x['content'], dict)
3855 if not tab_content:
3856 return
3462ffa8 3857 parent_renderer = (
29f7c58a 3858 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3859 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3860 for entry in extract_entries(parent_renderer):
3861 yield entry
3462ffa8 3862 continuation = continuation_list[0]
fe93e2c4 3863 visitor_data = None
d069eca7 3864
8bdd16b4 3865 for page_num in itertools.count(1):
3866 if not continuation:
3867 break
11f9be09 3868 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3869 response = self._extract_response(
3870 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3871 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3872 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3873
3874 if not response:
8bdd16b4 3875 break
f4f751af 3876 visitor_data = try_get(
3877 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3878
69184e41 3879 known_continuation_renderers = {
3880 'playlistVideoListContinuation': self._playlist_entries,
3881 'gridContinuation': self._grid_entries,
3882 'itemSectionContinuation': self._post_thread_continuation_entries,
3883 'sectionListContinuation': extract_entries, # for feeds
3884 }
8bdd16b4 3885 continuation_contents = try_get(
69184e41 3886 response, lambda x: x['continuationContents'], dict) or {}
3887 continuation_renderer = None
3888 for key, value in continuation_contents.items():
3889 if key not in known_continuation_renderers:
3462ffa8 3890 continue
69184e41 3891 continuation_renderer = value
3892 continuation_list = [None]
3893 for entry in known_continuation_renderers[key](continuation_renderer):
3894 yield entry
3895 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3896 break
3897 if continuation_renderer:
3898 continue
c5e8d7af 3899
a1b535bd 3900 known_renderers = {
3901 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3902 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3903 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3904 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3905 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3906 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3907 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3908 }
cce889b9 3909 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3910 continuation_items = try_get(
cce889b9 3911 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3912 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3913 video_items_renderer = None
3914 for key, value in continuation_item.items():
3915 if key not in known_renderers:
8bdd16b4 3916 continue
a1b535bd 3917 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3918 continuation_list = [None]
a1b535bd 3919 for entry in known_renderers[key][0](video_items_renderer):
3920 yield entry
9ba5705a 3921 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3922 break
3923 if video_items_renderer:
3924 continue
8bdd16b4 3925 break
9558dcec 3926
8bdd16b4 3927 @staticmethod
3928 def _extract_selected_tab(tabs):
3929 for tab in tabs:
cd684175 3930 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3931 if renderer.get('selected') is True:
3932 return renderer
2b3c2546 3933 else:
8bdd16b4 3934 raise ExtractorError('Unable to find selected tab')
b82f815f 3935
47193e02 3936 @classmethod
3937 def _extract_uploader(cls, data):
8bdd16b4 3938 uploader = {}
47193e02 3939 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3940 owner = try_get(
3941 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3942 if owner:
3943 uploader['uploader'] = owner.get('text')
3944 uploader['uploader_id'] = try_get(
3945 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3946 uploader['uploader_url'] = urljoin(
3947 'https://www.youtube.com/',
3948 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3949 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3950
d069eca7 3951 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3952 playlist_id = title = description = channel_url = channel_name = channel_id = None
3953 thumbnails_list = tags = []
3954
8bdd16b4 3955 selected_tab = self._extract_selected_tab(tabs)
3956 renderer = try_get(
3957 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3958 if renderer:
b60419c5 3959 channel_name = renderer.get('title')
3960 channel_url = renderer.get('channelUrl')
3961 channel_id = renderer.get('externalId')
39ed931e 3962 else:
64c0d954 3963 renderer = try_get(
3964 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3965
8bdd16b4 3966 if renderer:
3967 title = renderer.get('title')
ecc97af3 3968 description = renderer.get('description', '')
b60419c5 3969 playlist_id = channel_id
3970 tags = renderer.get('keywords', '').split()
3971 thumbnails_list = (
3972 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3973 or try_get(
47193e02 3974 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3975 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3976 list)
b60419c5 3977 or [])
3978
3979 thumbnails = []
3980 for t in thumbnails_list:
3981 if not isinstance(t, dict):
3982 continue
3983 thumbnail_url = url_or_none(t.get('url'))
3984 if not thumbnail_url:
3985 continue
3986 thumbnails.append({
3987 'url': thumbnail_url,
3988 'width': int_or_none(t.get('width')),
3989 'height': int_or_none(t.get('height')),
3990 })
3462ffa8 3991 if playlist_id is None:
70d5c17b 3992 playlist_id = item_id
3993 if title is None:
39ed931e 3994 title = (
3995 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3996 or playlist_id)
b60419c5 3997 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3998 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3999 metadata = {
4000 'playlist_id': playlist_id,
4001 'playlist_title': title,
4002 'playlist_description': description,
4003 'uploader': channel_name,
4004 'uploader_id': channel_id,
4005 'uploader_url': channel_url,
4006 'thumbnails': thumbnails,
4007 'tags': tags,
4008 }
47193e02 4009 availability = self._extract_availability(data)
4010 if availability:
4011 metadata['availability'] = availability
b60419c5 4012 if not channel_id:
4013 metadata.update(self._extract_uploader(data))
4014 metadata.update({
4015 'channel': metadata['uploader'],
4016 'channel_id': metadata['uploader_id'],
4017 'channel_url': metadata['uploader_url']})
11f9be09 4018 ytcfg = self.extract_ytcfg(item_id, webpage)
b60419c5 4019 return self.playlist_result(
d069eca7
M
4020 self._entries(
4021 selected_tab, playlist_id,
4022 self._extract_identity_token(webpage, item_id),
fe93e2c4 4023 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 4024 **metadata)
73c4ac2c 4025
79360d99 4026 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 4027 first_id = last_id = None
11f9be09 4028 ytcfg = self.extract_ytcfg(playlist_id, webpage)
4029 headers = self.generate_api_headers(
fe93e2c4 4030 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4031 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 4032 for page_num in itertools.count(1):
cd7c66cf 4033 videos = list(self._playlist_entries(playlist))
4034 if not videos:
4035 return
2be71994 4036 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4037 if start >= len(videos):
4038 return
4039 for video in videos[start:]:
4040 if video['id'] == first_id:
4041 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4042 return
4043 yield video
4044 first_id = first_id or videos[0]['id']
4045 last_id = videos[-1]['id']
79360d99 4046 watch_endpoint = try_get(
4047 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4048 query = {
4049 'playlistId': playlist_id,
4050 'videoId': watch_endpoint.get('videoId') or last_id,
4051 'index': watch_endpoint.get('index') or len(videos),
4052 'params': watch_endpoint.get('params') or 'OAE%3D'
4053 }
4054 response = self._extract_response(
4055 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4056 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4057 check_get_keys='contents'
4058 )
cd7c66cf 4059 playlist = try_get(
79360d99 4060 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4061
79360d99 4062 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 4063 title = playlist.get('title') or try_get(
4064 data, lambda x: x['titleText']['simpleText'], compat_str)
4065 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4066
4067 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4068 playlist_url = urljoin(url, try_get(
4069 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4070 compat_str))
4071 if playlist_url and playlist_url != url:
4072 return self.url_result(
4073 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4074 video_title=title)
cd7c66cf 4075
8bdd16b4 4076 return self.playlist_result(
79360d99 4077 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 4078 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4079
47193e02 4080 def _extract_availability(self, data):
4081 """
4082 Gets the availability of a given playlist/tab.
4083 Note: Unless YouTube tells us explicitly, we do not assume it is public
4084 @param data: response
4085 """
4086 is_private = is_unlisted = None
4087 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4088 badge_labels = self._extract_badges(renderer)
4089
4090 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4091 privacy_dropdown_entries = try_get(
4092 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4093 for renderer_dict in privacy_dropdown_entries:
4094 is_selected = try_get(
4095 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4096 if not is_selected:
4097 continue
052e1350 4098 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4099 if label:
4100 badge_labels.add(label.lower())
4101 break
4102
4103 for badge_label in badge_labels:
4104 if badge_label == 'unlisted':
4105 is_unlisted = True
4106 elif badge_label == 'private':
4107 is_private = True
4108 elif badge_label == 'public':
4109 is_unlisted = is_private = False
4110 return self._availability(is_private, False, False, False, is_unlisted)
4111
4112 @staticmethod
4113 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4114 sidebar_renderer = try_get(
4115 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4116 for item in sidebar_renderer:
4117 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4118 if renderer:
4119 return renderer
4120
358de58c 4121 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4122 """
4123 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4124 """
5d342002 4125 browse_id = params = None
47193e02 4126 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4127 if not renderer:
4128 return
4129 menu_renderer = try_get(
4130 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4131 for menu_item in menu_renderer:
4132 if not isinstance(menu_item, dict):
358de58c 4133 continue
47193e02 4134 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4135 text = try_get(
4136 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4137 if not text or text.lower() != 'show unavailable videos':
4138 continue
4139 browse_endpoint = try_get(
4140 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4141 browse_id = browse_endpoint.get('browseId')
4142 params = browse_endpoint.get('params')
4143 break
5d342002 4144
11f9be09 4145 ytcfg = self.extract_ytcfg(item_id, webpage)
4146 headers = self.generate_api_headers(
fe93e2c4 4147 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 4148 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4149 visitor_data=try_get(
4150 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4151 query = {
4152 'params': params or 'wgYCCAA=',
4153 'browseId': browse_id or 'VL%s' % item_id
4154 }
4155 return self._extract_response(
4156 item_id=item_id, headers=headers, query=query,
fe93e2c4 4157 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4158 note='Downloading API JSON with unavailable videos')
358de58c 4159
cd7c66cf 4160 def _extract_webpage(self, url, item_id):
a06916d9 4161 retries = self.get_param('extractor_retries', 3)
62bff2c1 4162 count = -1
c705177d 4163 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4164 while count < retries:
62bff2c1 4165 count += 1
14fdfea9 4166 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4167 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4168 if count:
c705177d 4169 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4170 webpage = self._download_webpage(
4171 url, item_id,
cd7c66cf 4172 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
11f9be09 4173 data = self.extract_yt_initial_data(item_id, webpage)
14fdfea9 4174 if data.get('contents') or data.get('currentVideoEndpoint'):
4175 break
95c01b6c 4176 # Extract alerts here only when there is error
4177 self._extract_and_report_alerts(data)
c705177d 4178 if count >= retries:
6a39ee13 4179 raise ExtractorError(last_error)
cd7c66cf 4180 return webpage, data
4181
9297939e 4182 @staticmethod
4183 def _smuggle_data(entries, data):
4184 for entry in entries:
4185 if data:
4186 entry['url'] = smuggle_url(entry['url'], data)
4187 yield entry
4188
cd7c66cf 4189 def _real_extract(self, url):
9297939e 4190 url, smuggled_data = unsmuggle_url(url, {})
4191 if self.is_music_url(url):
4192 smuggled_data['is_music_url'] = True
fe03a6cd 4193 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4194 if info_dict.get('entries'):
4195 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4196 return info_dict
4197
fe03a6cd 4198 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4199
4200 def __real_extract(self, url, smuggled_data):
cd7c66cf 4201 item_id = self._match_id(url)
4202 url = compat_urlparse.urlunparse(
4203 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4204 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4205
fe03a6cd 4206 def get_mobj(url):
4207 mobj = self._url_re.match(url).groupdict()
07cce701 4208 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4209 return mobj
4210
4211 mobj = get_mobj(url)
4212 # Youtube returns incomplete data if tabname is not lower case
4213 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4214
4215 if is_channel:
4216 if smuggled_data.get('is_music_url'):
4217 if item_id[:2] == 'VL':
4218 # Youtube music VL channels have an equivalent playlist
4219 item_id = item_id[2:]
4220 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4221 elif item_id[:2] == 'MP':
4222 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4223 item_id = self._search_regex(
4224 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4225 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4226 'playlist id')
4227 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4228 elif mobj['channel_type'] == 'browse':
4229 # Youtube music /browse/ should be changed to /channel/
4230 pre = 'https://www.youtube.com/channel/%s' % item_id
4231 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4232 # Home URLs should redirect to /videos/
6a39ee13 4233 self.report_warning(
cd7c66cf 4234 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4235 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4236 tab = '/videos'
4237
4238 url = ''.join((pre, tab, post))
4239 mobj = get_mobj(url)
cd7c66cf 4240
4241 # Handle both video/playlist URLs
201c1459 4242 qs = parse_qs(url)
cd7c66cf 4243 video_id = qs.get('v', [None])[0]
4244 playlist_id = qs.get('list', [None])[0]
4245
fe03a6cd 4246 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4247 if not playlist_id:
fe03a6cd 4248 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4249 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4250 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4251 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4252 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4253 mobj = get_mobj(url)
cd7c66cf 4254
4255 if video_id and playlist_id:
a06916d9 4256 if self.get_param('noplaylist'):
cd7c66cf 4257 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4258 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4259 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4260
4261 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4262
18db7548 4263 tabs = try_get(
4264 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4265 if tabs:
4266 selected_tab = self._extract_selected_tab(tabs)
4267 tab_name = selected_tab.get('title', '')
09f1580e 4268 if 'no-youtube-channel-redirect' not in compat_opts:
4269 if mobj['tab'] == '/live':
4270 # Live tab should have redirected to the video
4271 raise ExtractorError('The channel is not currently live', expected=True)
4272 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4273 if not mobj['not_channel'] and item_id[:2] == 'UC':
4274 # Topic channels don't have /videos. Use the equivalent playlist instead
4275 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4276 pl_id = 'UU%s' % item_id[2:]
4277 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4278 try:
4279 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4280 for alert_type, alert_message in self._extract_alerts(pl_data):
4281 if alert_type == 'error':
4282 raise ExtractorError('Youtube said: %s' % alert_message)
4283 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4284 except ExtractorError:
4285 self.report_warning('The playlist gave error. Falling back to channel URL')
4286 else:
4287 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4288
4289 self.write_debug('Final URL: %s' % url)
4290
358de58c 4291 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4292 if 'no-youtube-unavailable-videos' not in compat_opts:
4293 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
c0ac49bc 4294 self._extract_and_report_alerts(data, only_once=True)
8bdd16b4 4295 tabs = try_get(
4296 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4297 if tabs:
d069eca7 4298 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4299
8bdd16b4 4300 playlist = try_get(
4301 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4302 if playlist:
79360d99 4303 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4304
a0566bbf 4305 video_id = try_get(
4306 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4307 compat_str) or video_id
8bdd16b4 4308 if video_id:
09f1580e 4309 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4310 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4311 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4312
8bdd16b4 4313 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4314
c5e8d7af 4315
8bdd16b4 4316class YoutubePlaylistIE(InfoExtractor):
4317 IE_DESC = 'YouTube.com playlists'
4318 _VALID_URL = r'''(?x)(?:
4319 (?:https?://)?
4320 (?:\w+\.)?
4321 (?:
4322 (?:
4323 youtube(?:kids)?\.com|
29f7c58a 4324 invidio\.us
8bdd16b4 4325 )
4326 /.*?\?.*?\blist=
4327 )?
4328 (?P<id>%(playlist_id)s)
4329 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4330 IE_NAME = 'youtube:playlist'
cdc628a4 4331 _TESTS = [{
8bdd16b4 4332 'note': 'issue #673',
4333 'url': 'PLBB231211A4F62143',
cdc628a4 4334 'info_dict': {
8bdd16b4 4335 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4336 'id': 'PLBB231211A4F62143',
4337 'uploader': 'Wickydoo',
4338 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4339 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4340 },
4341 'playlist_mincount': 29,
4342 }, {
4343 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4344 'info_dict': {
4345 'title': 'YDL_safe_search',
4346 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4347 },
4348 'playlist_count': 2,
4349 'skip': 'This playlist is private',
9558dcec 4350 }, {
8bdd16b4 4351 'note': 'embedded',
4352 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4353 'playlist_count': 4,
9558dcec 4354 'info_dict': {
8bdd16b4 4355 'title': 'JODA15',
4356 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4357 'uploader': 'milan',
4358 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4359 }
cdc628a4 4360 }, {
8bdd16b4 4361 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4362 'playlist_mincount': 654,
8bdd16b4 4363 'info_dict': {
4364 'title': '2018 Chinese New Singles (11/6 updated)',
4365 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4366 'uploader': 'LBK',
4367 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4368 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4369 }
daa0df9e 4370 }, {
29f7c58a 4371 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4372 'only_matching': True,
4373 }, {
4374 # music album playlist
4375 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4376 'only_matching': True,
4377 }]
4378
4379 @classmethod
4380 def suitable(cls, url):
201c1459 4381 if YoutubeTabIE.suitable(url):
4382 return False
1bdae7d3 4383 # Hack for lazy extractors until more generic solution is implemented
4384 # (see #28780)
4385 from .youtube import parse_qs
201c1459 4386 qs = parse_qs(url)
4387 if qs.get('v', [None])[0]:
4388 return False
4389 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4390
4391 def _real_extract(self, url):
4392 playlist_id = self._match_id(url)
46953e7e 4393 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4394 url = update_url_query(
4395 'https://www.youtube.com/playlist',
4396 parse_qs(url) or {'list': playlist_id})
4397 if is_music_url:
4398 url = smuggle_url(url, {'is_music_url': True})
4399 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4400
4401
4402class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4403 IE_DESC = 'youtu.be'
29f7c58a 4404 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4405 _TESTS = [{
8bdd16b4 4406 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4407 'info_dict': {
4408 'id': 'yeWKywCrFtk',
4409 'ext': 'mp4',
4410 'title': 'Small Scale Baler and Braiding Rugs',
4411 'uploader': 'Backus-Page House Museum',
4412 'uploader_id': 'backuspagemuseum',
4413 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4414 'upload_date': '20161008',
4415 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4416 'categories': ['Nonprofits & Activism'],
4417 'tags': list,
4418 'like_count': int,
4419 'dislike_count': int,
4420 },
4421 'params': {
4422 'noplaylist': True,
4423 'skip_download': True,
4424 },
39e7107d 4425 }, {
8bdd16b4 4426 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4427 'only_matching': True,
cdc628a4
PH
4428 }]
4429
8bdd16b4 4430 def _real_extract(self, url):
5ad28e7f 4431 mobj = self._match_valid_url(url)
29f7c58a 4432 video_id = mobj.group('id')
4433 playlist_id = mobj.group('playlist_id')
8bdd16b4 4434 return self.url_result(
29f7c58a 4435 update_url_query('https://www.youtube.com/watch', {
4436 'v': video_id,
4437 'list': playlist_id,
4438 'feature': 'youtu.be',
4439 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4440
4441
4442class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4443 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4444 _VALID_URL = r'ytuser:(?P<id>.+)'
4445 _TESTS = [{
4446 'url': 'ytuser:phihag',
4447 'only_matching': True,
4448 }]
4449
4450 def _real_extract(self, url):
4451 user_id = self._match_id(url)
4452 return self.url_result(
4453 'https://www.youtube.com/user/%s' % user_id,
4454 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4455
b05654f0 4456
3d3dddc9 4457class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4458 IE_NAME = 'youtube:favorites'
4459 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4460 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4461 _LOGIN_REQUIRED = True
4462 _TESTS = [{
4463 'url': ':ytfav',
4464 'only_matching': True,
4465 }, {
4466 'url': ':ytfavorites',
4467 'only_matching': True,
4468 }]
4469
4470 def _real_extract(self, url):
4471 return self.url_result(
4472 'https://www.youtube.com/playlist?list=LL',
4473 ie=YoutubeTabIE.ie_key())
4474
4475
79360d99 4476class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4477 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4478 # there doesn't appear to be a real limit, for example if you search for
4479 # 'python' you get more than 8.000.000 results
4480 _MAX_RESULTS = float('inf')
78caa52a 4481 IE_NAME = 'youtube:search'
b05654f0 4482 _SEARCH_KEY = 'ytsearch'
6c894ea1 4483 _SEARCH_PARAMS = None
9dd8e46a 4484 _TESTS = []
b05654f0 4485
6c894ea1 4486 def _entries(self, query, n):
a5c56234 4487 data = {'query': query}
6c894ea1
U
4488 if self._SEARCH_PARAMS:
4489 data['params'] = self._SEARCH_PARAMS
4490 total = 0
fe93e2c4 4491 continuation = {}
6c894ea1 4492 for page_num in itertools.count(1):
fe93e2c4 4493 data.update(continuation)
79360d99 4494 search = self._extract_response(
4495 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4496 check_get_keys=('contents', 'onResponseReceivedCommands')
4497 )
6c894ea1 4498 if not search:
b4c08069 4499 break
6c894ea1
U
4500 slr_contents = try_get(
4501 search,
4502 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4503 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4504 list)
4505 if not slr_contents:
a22b2fd1 4506 break
0366ae87 4507
0366ae87
M
4508 # Youtube sometimes adds promoted content to searches,
4509 # changing the index location of videos and token.
4510 # So we search through all entries till we find them.
fe93e2c4 4511 continuation = None
30a074c2 4512 for slr_content in slr_contents:
fe93e2c4 4513 if not continuation:
4514 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4515
30a074c2 4516 isr_contents = try_get(
4517 slr_content,
4518 lambda x: x['itemSectionRenderer']['contents'],
4519 list)
9da76d30 4520 if not isr_contents:
30a074c2 4521 continue
4522 for content in isr_contents:
4523 if not isinstance(content, dict):
4524 continue
4525 video = content.get('videoRenderer')
4526 if not isinstance(video, dict):
4527 continue
4528 video_id = video.get('videoId')
4529 if not video_id:
4530 continue
4531
4532 yield self._extract_video(video)
4533 total += 1
4534 if total == n:
4535 return
0366ae87 4536
fe93e2c4 4537 if not continuation:
6c894ea1 4538 break
b05654f0 4539
6c894ea1
U
4540 def _get_n_results(self, query, n):
4541 """Get a specified number of results for a query"""
11f9be09 4542 return self.playlist_result(self._entries(query, n), query, query)
75dff0ee 4543
c9ae7b95 4544
a3dd9248 4545class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4546 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4547 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4548 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4549 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4550
c9ae7b95 4551
386e1dd9 4552class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4553 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4554 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4555 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4556 # _MAX_RESULTS = 100
3462ffa8 4557 _TESTS = [{
4558 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4559 'playlist_mincount': 5,
4560 'info_dict': {
11f9be09 4561 'id': 'youtube-dl test video',
3462ffa8 4562 'title': 'youtube-dl test video',
4563 }
4564 }, {
4565 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4566 'only_matching': True,
4567 }]
4568
386e1dd9 4569 @classmethod
4570 def _make_valid_url(cls):
4571 return cls._VALID_URL
4572
3462ffa8 4573 def _real_extract(self, url):
4dfbf869 4574 qs = parse_qs(url)
386e1dd9 4575 query = (qs.get('search_query') or qs.get('q'))[0]
4576 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4577 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4578
4579
4580class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4581 """
25f14e9f 4582 Base class for feed extractors
3d3dddc9 4583 Subclasses must define the _FEED_NAME property.
d7ae0639 4584 """
b2e8bc1b 4585 _LOGIN_REQUIRED = True
ef2f3c7f 4586 _TESTS = []
d7ae0639
JMF
4587
4588 @property
4589 def IE_NAME(self):
78caa52a 4590 return 'youtube:%s' % self._FEED_NAME
04cc9617 4591
3853309f 4592 def _real_extract(self, url):
3d3dddc9 4593 return self.url_result(
4594 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4595 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4596
4597
ef2f3c7f 4598class YoutubeWatchLaterIE(InfoExtractor):
4599 IE_NAME = 'youtube:watchlater'
70d5c17b 4600 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4601 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4602 _TESTS = [{
8bdd16b4 4603 'url': ':ytwatchlater',
bc7a9cd8
S
4604 'only_matching': True,
4605 }]
25f14e9f
S
4606
4607 def _real_extract(self, url):
ef2f3c7f 4608 return self.url_result(
4609 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4610
4611
25f14e9f
S
4612class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4613 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4614 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4615 _FEED_NAME = 'recommended'
45db527f 4616 _LOGIN_REQUIRED = False
3d3dddc9 4617 _TESTS = [{
4618 'url': ':ytrec',
4619 'only_matching': True,
4620 }, {
4621 'url': ':ytrecommended',
4622 'only_matching': True,
4623 }, {
4624 'url': 'https://youtube.com',
4625 'only_matching': True,
4626 }]
1ed5b5c9 4627
1ed5b5c9 4628
25f14e9f 4629class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4630 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4631 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4632 _FEED_NAME = 'subscriptions'
3d3dddc9 4633 _TESTS = [{
4634 'url': ':ytsubs',
4635 'only_matching': True,
4636 }, {
4637 'url': ':ytsubscriptions',
4638 'only_matching': True,
4639 }]
1ed5b5c9 4640
1ed5b5c9 4641
25f14e9f 4642class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4643 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4644 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4645 _FEED_NAME = 'history'
3d3dddc9 4646 _TESTS = [{
4647 'url': ':ythistory',
4648 'only_matching': True,
4649 }]
1ed5b5c9
JMF
4650
4651
15870e90
PH
4652class YoutubeTruncatedURLIE(InfoExtractor):
4653 IE_NAME = 'youtube:truncated_url'
4654 IE_DESC = False # Do not list
975d35db 4655 _VALID_URL = r'''(?x)
b95aab84
PH
4656 (?:https?://)?
4657 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4658 (?:watch\?(?:
c4808c60 4659 feature=[a-z_]+|
b95aab84
PH
4660 annotation_id=annotation_[^&]+|
4661 x-yt-cl=[0-9]+|
c1708b89 4662 hl=[^&]*|
287be8c6 4663 t=[0-9]+
b95aab84
PH
4664 )?
4665 |
4666 attribution_link\?a=[^&]+
4667 )
4668 $
975d35db 4669 '''
15870e90 4670
c4808c60 4671 _TESTS = [{
2d3d2997 4672 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4673 'only_matching': True,
dc2fc736 4674 }, {
2d3d2997 4675 'url': 'https://www.youtube.com/watch?',
dc2fc736 4676 'only_matching': True,
b95aab84
PH
4677 }, {
4678 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4679 'only_matching': True,
4680 }, {
4681 'url': 'https://www.youtube.com/watch?feature=foo',
4682 'only_matching': True,
c1708b89
PH
4683 }, {
4684 'url': 'https://www.youtube.com/watch?hl=en-GB',
4685 'only_matching': True,
287be8c6
PH
4686 }, {
4687 'url': 'https://www.youtube.com/watch?t=2372',
4688 'only_matching': True,
c4808c60
PH
4689 }]
4690
15870e90
PH
4691 def _real_extract(self, url):
4692 raise ExtractorError(
78caa52a
PH
4693 'Did you forget to quote the URL? Remember that & is a meta '
4694 'character in most shells, so you want to put the URL in quotes, '
3867038a 4695 'like youtube-dl '
2d3d2997 4696 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4697 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4698 expected=True)
772fd5cc
PH
4699
4700
4701class YoutubeTruncatedIDIE(InfoExtractor):
4702 IE_NAME = 'youtube:truncated_id'
4703 IE_DESC = False # Do not list
b95aab84 4704 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4705
4706 _TESTS = [{
4707 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4708 'only_matching': True,
4709 }]
4710
4711 def _real_extract(self, url):
4712 video_id = self._match_id(url)
4713 raise ExtractorError(
4714 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4715 expected=True)