]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube] Fix format sorting when using alternate clients
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
94278f72 41 mimetype2ext,
9c0d7f49 42 network_exceptions,
11f9be09 43 orderedSet,
6310acf5 44 parse_codecs,
49bd8c66 45 parse_count,
7c80519c 46 parse_duration,
7ea65411 47 parse_iso8601,
dca3ff4a 48 qualities,
3995d37d 49 remove_start,
cf7e015f 50 smuggle_url,
dbdaaa23 51 str_or_none,
c93d53f5 52 str_to_int,
7c365c21 53 traverse_obj,
556dbe7f 54 try_get,
c5e8d7af
PH
55 unescapeHTML,
56 unified_strdate,
cf7e015f 57 unsmuggle_url,
8bdd16b4 58 update_url_query,
21c340b8 59 url_or_none,
6e6bc8da 60 urlencode_postdata,
fe93e2c4 61 urljoin,
7c365c21 62 variadic,
c5e8d7af
PH
63)
64
5f6a1245 65
201c1459 66def parse_qs(url):
67 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
68
69
de7f3446 70class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
71 """Provide base functions for Youtube extractors"""
72 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 73 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
74
75 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
76 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
77 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 78
3462ffa8 79 _RESERVED_NAMES = (
bea74222 80 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 81 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 82 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 83
b2e8bc1b
JMF
84 _NETRC_MACHINE = 'youtube'
85 # If True it will raise an error if no login info is provided
86 _LOGIN_REQUIRED = False
87
70d5c17b 88 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 89
b2e8bc1b 90 def _login(self):
83317f69 91 """
92 Attempt to log in to YouTube.
93 True is returned if successful or skipped.
94 False is returned if login failed.
95
96 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
97 """
9d5d4d64 98
99 def warn(message):
100 self.report_warning(message)
101
102 # username+password login is broken
982ee69a
MB
103 if (self._LOGIN_REQUIRED
104 and self.get_param('cookiefile') is None
105 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 106 self.raise_login_required(
107 'Login details are needed to download this content', method='cookies')
68217024 108 username, password = self._get_login_info()
9d5d4d64 109 if username:
110 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
111 return
9d5d4d64 112
2d6659b9 113 # Everything below this is broken!
114 r'''
b2e8bc1b
JMF
115 # No authentication to be performed
116 if username is None:
a06916d9 117 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 118 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 119 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 120 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 121 return True
b2e8bc1b 122
7cc3570e
PH
123 login_page = self._download_webpage(
124 self._LOGIN_URL, None,
69ea8ca4
PH
125 note='Downloading login page',
126 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
127 if login_page is False:
128 return
b2e8bc1b 129
1212e997 130 login_form = self._hidden_inputs(login_page)
c5e8d7af 131
e00eb564
S
132 def req(url, f_req, note, errnote):
133 data = login_form.copy()
134 data.update({
135 'pstMsg': 1,
136 'checkConnection': 'youtube',
137 'checkedDomains': 'youtube',
138 'hl': 'en',
139 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 140 'f.req': json.dumps(f_req),
e00eb564
S
141 'flowName': 'GlifWebSignIn',
142 'flowEntry': 'ServiceLogin',
baf67a60
S
143 # TODO: reverse actual botguard identifier generation algo
144 'bgRequest': '["identifier",""]',
041bc3ad 145 })
e00eb564
S
146 return self._download_json(
147 url, None, note=note, errnote=errnote,
148 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
149 fatal=False,
150 data=urlencode_postdata(data), headers={
151 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
152 'Google-Accounts-XSRF': 1,
153 })
154
3995d37d
S
155 lookup_req = [
156 username,
157 None, [], None, 'US', None, None, 2, False, True,
158 [
159 None, None,
160 [2, 1, None, 1,
161 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
162 None, [], 4],
163 1, [None, None, []], None, None, None, True
164 ],
165 username,
166 ]
167
e00eb564 168 lookup_results = req(
3995d37d 169 self._LOOKUP_URL, lookup_req,
e00eb564
S
170 'Looking up account info', 'Unable to look up account info')
171
172 if lookup_results is False:
173 return False
041bc3ad 174
3995d37d
S
175 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
176 if not user_hash:
177 warn('Unable to extract user hash')
178 return False
179
180 challenge_req = [
181 user_hash,
182 None, 1, None, [1, None, None, None, [password, None, True]],
183 [
184 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
185 1, [None, None, []], None, None, None, True
186 ]]
83317f69 187
3995d37d
S
188 challenge_results = req(
189 self._CHALLENGE_URL, challenge_req,
190 'Logging in', 'Unable to log in')
83317f69 191
3995d37d 192 if challenge_results is False:
e00eb564 193 return
83317f69 194
3995d37d
S
195 login_res = try_get(challenge_results, lambda x: x[0][5], list)
196 if login_res:
197 login_msg = try_get(login_res, lambda x: x[5], compat_str)
198 warn(
199 'Unable to login: %s' % 'Invalid password'
200 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
201 return False
202
203 res = try_get(challenge_results, lambda x: x[0][-1], list)
204 if not res:
205 warn('Unable to extract result entry')
206 return False
207
9a6628aa
S
208 login_challenge = try_get(res, lambda x: x[0][0], list)
209 if login_challenge:
210 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
211 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
212 # SEND_SUCCESS - TFA code has been successfully sent to phone
213 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 214 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
215 if status == 'QUOTA_EXCEEDED':
216 warn('Exceeded the limit of TFA codes, try later')
217 return False
218
219 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
220 if not tl:
221 warn('Unable to extract TL')
222 return False
223
224 tfa_code = self._get_tfa_info('2-step verification code')
225
226 if not tfa_code:
227 warn(
228 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
229 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
230 return False
231
232 tfa_code = remove_start(tfa_code, 'G-')
233
234 tfa_req = [
235 user_hash, None, 2, None,
236 [
237 9, None, None, None, None, None, None, None,
238 [None, tfa_code, True, 2]
239 ]]
240
241 tfa_results = req(
242 self._TFA_URL.format(tl), tfa_req,
243 'Submitting TFA code', 'Unable to submit TFA code')
244
245 if tfa_results is False:
246 return False
247
248 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
249 if tfa_res:
250 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
251 warn(
252 'Unable to finish TFA: %s' % 'Invalid TFA code'
253 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
254 return False
255
256 check_cookie_url = try_get(
257 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
258 else:
259 CHALLENGES = {
260 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
261 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
262 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
263 }
264 challenge = CHALLENGES.get(
265 challenge_str,
266 '%s returned error %s.' % (self.IE_NAME, challenge_str))
267 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
268 return False
3995d37d
S
269 else:
270 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
271
272 if not check_cookie_url:
273 warn('Unable to extract CheckCookie URL')
274 return False
e00eb564
S
275
276 check_cookie_results = self._download_webpage(
3995d37d
S
277 check_cookie_url, None, 'Checking cookie', fatal=False)
278
279 if check_cookie_results is False:
280 return False
e00eb564 281
3995d37d
S
282 if 'https://myaccount.google.com/' not in check_cookie_results:
283 warn('Unable to log in')
b2e8bc1b 284 return False
e00eb564 285
b2e8bc1b 286 return True
2d6659b9 287 '''
b2e8bc1b 288
cce889b9 289 def _initialize_consent(self):
290 cookies = self._get_cookies('https://www.youtube.com/')
291 if cookies.get('__Secure-3PSID'):
292 return
293 consent_id = None
294 consent = cookies.get('CONSENT')
295 if consent:
296 if 'YES' in consent.value:
297 return
298 consent_id = self._search_regex(
299 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
300 if not consent_id:
301 consent_id = random.randint(100, 999)
302 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 303
b2e8bc1b 304 def _real_initialize(self):
cce889b9 305 self._initialize_consent()
b2e8bc1b
JMF
306 if self._downloader is None:
307 return
b2e8bc1b
JMF
308 if not self._login():
309 return
c5e8d7af 310
a0566bbf 311 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 312 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
313 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 314
109dd3b2 315 _YT_DEFAULT_YTCFGS = {
316 'WEB': {
317 'INNERTUBE_API_VERSION': 'v1',
318 'INNERTUBE_CLIENT_NAME': 'WEB',
319 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
320 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
321 'INNERTUBE_CONTEXT': {
322 'client': {
323 'clientName': 'WEB',
324 'clientVersion': '2.20210622.10.00',
325 'hl': 'en',
326 }
327 },
328 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
329 },
330 'WEB_REMIX': {
331 'INNERTUBE_API_VERSION': 'v1',
332 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
333 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
334 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
335 'INNERTUBE_CONTEXT': {
336 'client': {
337 'clientName': 'WEB_REMIX',
338 'clientVersion': '1.20210621.00.00',
339 'hl': 'en',
340 }
341 },
342 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
343 },
344 'WEB_EMBEDDED_PLAYER': {
345 'INNERTUBE_API_VERSION': 'v1',
346 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
347 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
348 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
349 'INNERTUBE_CONTEXT': {
350 'client': {
351 'clientName': 'WEB_EMBEDDED_PLAYER',
352 'clientVersion': '1.20210620.0.1',
353 'hl': 'en',
354 }
355 },
356 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
357 },
358 'ANDROID': {
359 'INNERTUBE_API_VERSION': 'v1',
360 'INNERTUBE_CLIENT_NAME': 'ANDROID',
361 'INNERTUBE_CLIENT_VERSION': '16.20',
362 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
363 'INNERTUBE_CONTEXT': {
364 'client': {
365 'clientName': 'ANDROID',
366 'clientVersion': '16.20',
367 'hl': 'en',
368 }
369 },
fe93e2c4 370 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
109dd3b2 371 },
372 'ANDROID_EMBEDDED_PLAYER': {
373 'INNERTUBE_API_VERSION': 'v1',
374 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
375 'INNERTUBE_CLIENT_VERSION': '16.20',
376 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
377 'INNERTUBE_CONTEXT': {
378 'client': {
379 'clientName': 'ANDROID_EMBEDDED_PLAYER',
380 'clientVersion': '16.20',
381 'hl': 'en',
382 }
383 },
fe93e2c4 384 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
109dd3b2 385 },
386 'ANDROID_MUSIC': {
387 'INNERTUBE_API_VERSION': 'v1',
388 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
389 'INNERTUBE_CLIENT_VERSION': '4.32',
390 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
391 'INNERTUBE_CONTEXT': {
392 'client': {
393 'clientName': 'ANDROID_MUSIC',
394 'clientVersion': '4.32',
395 'hl': 'en',
396 }
397 },
fe93e2c4 398 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
11f9be09 399 },
400 'IOS': {
401 'INNERTUBE_API_VERSION': 'v1',
402 'INNERTUBE_CLIENT_NAME': 'IOS',
403 'INNERTUBE_CLIENT_VERSION': '16.20',
404 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
405 'INNERTUBE_CONTEXT': {
406 'client': {
407 'clientName': 'IOS',
408 'clientVersion': '16.20',
409 'hl': 'en',
410 }
411 },
412 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
413
414 },
415 'IOS_MUSIC': {
416 'INNERTUBE_API_VERSION': 'v1',
417 'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
418 'INNERTUBE_CLIENT_VERSION': '4.32',
419 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
420 'INNERTUBE_CONTEXT': {
421 'client': {
422 'clientName': 'IOS_MUSIC',
423 'clientVersion': '4.32',
424 'hl': 'en',
425 }
426 },
427 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
428 },
429 'IOS_MESSAGES_EXTENSION': {
430 'INNERTUBE_API_VERSION': 'v1',
431 'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
432 'INNERTUBE_CLIENT_VERSION': '16.20',
433 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
434 'INNERTUBE_CONTEXT': {
435 'client': {
436 'clientName': 'IOS_MESSAGES_EXTENSION',
437 'clientVersion': '16.20',
438 'hl': 'en',
439 }
440 },
441 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
5a1fc62b 442 },
443 'MWEB': {
444 'INNERTUBE_API_VERSION': 'v1',
445 'INNERTUBE_CLIENT_NAME': 'MWEB',
446 'INNERTUBE_CLIENT_VERSION': '2.20210721.07.00',
447 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
448 'INNERTUBE_CONTEXT': {
449 'client': {
450 'clientName': 'MWEB',
451 'clientVersion': '2.20210721.07.00',
452 'hl': 'en',
453 }
454 },
455 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
456 },
109dd3b2 457 }
458
459 _YT_DEFAULT_INNERTUBE_HOSTS = {
460 'DIRECT': 'youtubei.googleapis.com',
461 'WEB': 'www.youtube.com',
462 'WEB_REMIX': 'music.youtube.com',
463 'ANDROID_MUSIC': 'music.youtube.com'
464 }
465
11f9be09 466 # clients starting with _ cannot be explicity requested by the user
467 _YT_CLIENTS = {
11f9be09 468 'android': 'ANDROID',
469 'android_music': 'ANDROID_MUSIC',
470 '_android_embedded': 'ANDROID_EMBEDDED_PLAYER',
471 '_android_agegate': 'ANDROID',
472 'ios': 'IOS',
473 'ios_music': 'IOS_MUSIC',
474 '_ios_embedded': 'IOS_MESSAGES_EXTENSION',
b4c055ba 475 '_ios_agegate': 'IOS',
476 'web': 'WEB',
477 'web_music': 'WEB_REMIX',
478 '_web_embedded': 'WEB_EMBEDDED_PLAYER',
479 '_web_agegate': 'TVHTML5',
5a1fc62b 480 'mobile_web': 'MWEB',
11f9be09 481 }
482
109dd3b2 483 def _get_default_ytcfg(self, client='WEB'):
484 if client in self._YT_DEFAULT_YTCFGS:
485 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
486 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
487 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
488
489 def _get_innertube_host(self, client='WEB'):
490 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
491
492 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
493 # try_get but with fallback to default ytcfg client values when present
494 _func = lambda y: try_get(y, getter, expected_type)
495 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
496
497 def _extract_client_name(self, ytcfg, default_client='WEB'):
498 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
499
314ee305 500 @staticmethod
11f9be09 501 def _extract_session_index(*data):
502 for ytcfg in data:
503 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
504 if session_index is not None:
505 return session_index
314ee305 506
109dd3b2 507 def _extract_client_version(self, ytcfg, default_client='WEB'):
508 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
509
510 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
511 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
512
513 def _extract_context(self, ytcfg=None, default_client='WEB'):
514 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
515 context = _get_context(ytcfg)
516 if context:
517 return context
518
519 context = _get_context(self._get_default_ytcfg(default_client))
520 if not ytcfg:
521 return context
522
523 # Recreate the client context (required)
524 context['client'].update({
525 'clientVersion': self._extract_client_version(ytcfg, default_client),
526 'clientName': self._extract_client_name(ytcfg, default_client),
527 })
528 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
529 if visitor_data:
530 context['client']['visitorData'] = visitor_data
531 return context
532
533 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 534 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
535 # See: https://github.com/yt-dlp/yt-dlp/issues/393
536 yt_cookies = self._get_cookies('https://www.youtube.com')
537 sapisid_cookie = dict_get(
538 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
c926c954 539 if sapisid_cookie is None or not sapisid_cookie.value:
a5c56234
M
540 return
541 time_now = round(time.time())
1974e99f 542 # SAPISID cookie is required if not already present
543 if not yt_cookies.get('SAPISID'):
c926c954 544 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
1974e99f 545 self._set_cookie(
546 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
c926c954 547 self.write_debug('Extracted SAPISID cookie', only_once=True)
1974e99f 548 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
549 sapisidhash = hashlib.sha1(
109dd3b2 550 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 551 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
552
553 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 554 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 555 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 556
109dd3b2 557 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 558 data.update(query)
11f9be09 559 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 560 real_headers.update({'content-type': 'application/json'})
561 if headers:
562 real_headers.update(headers)
545cc85d 563 return self._download_json(
109dd3b2 564 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 565 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 566 data=json.dumps(data).encode('utf8'), headers=real_headers,
567 query={'key': api_key or self._extract_api_key()})
568
11f9be09 569 def extract_yt_initial_data(self, video_id, webpage):
8bdd16b4 570 return self._parse_json(
571 self._search_regex(
29f7c58a 572 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 573 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 574 video_id)
0c148415 575
a1c5d2ca 576 def _extract_identity_token(self, webpage, item_id):
11f9be09 577 if not webpage:
578 return None
579 ytcfg = self.extract_ytcfg(item_id, webpage)
a1c5d2ca
M
580 if ytcfg:
581 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
582 if token:
583 return token
584 return self._search_regex(
585 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
586 'identity token', default=None)
587
588 @staticmethod
fe93e2c4 589 def _extract_account_syncid(*args):
8ea3f7b9 590 """
591 Extract syncId required to download private playlists of secondary channels
fe93e2c4 592 @params response and/or ytcfg
8ea3f7b9 593 """
fe93e2c4 594 for data in args:
595 # ytcfg includes channel_syncid if on secondary channel
596 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
597 if delegated_sid:
598 return delegated_sid
599 sync_ids = (try_get(
600 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
601 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
602 if len(sync_ids) >= 2 and sync_ids[1]:
603 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
604 # and just "user_syncid||" for primary channel. We only want the channel_syncid
605 return sync_ids[0]
a1c5d2ca 606
11f9be09 607 def extract_ytcfg(self, video_id, webpage):
8c54a305 608 if not webpage:
609 return {}
29f7c58a 610 return self._parse_json(
611 self._search_regex(
612 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 613 default='{}'), video_id, fatal=False) or {}
614
11f9be09 615 def generate_api_headers(
616 self, ytcfg=None, identity_token=None, account_syncid=None,
617 visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
618 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 619 headers = {
109dd3b2 620 'X-YouTube-Client-Name': compat_str(
11f9be09 621 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
622 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
109dd3b2 623 'Origin': origin
f4f751af 624 }
2d6659b9 625 if not visitor_data and ytcfg:
626 visitor_data = try_get(
11f9be09 627 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 628 if identity_token:
109dd3b2 629 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 630 if account_syncid:
631 headers['X-Goog-PageId'] = account_syncid
314ee305 632 if session_index is None and ytcfg:
633 session_index = self._extract_session_index(ytcfg)
634 if account_syncid or session_index is not None:
635 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 636 if visitor_data:
109dd3b2 637 headers['X-Goog-Visitor-Id'] = visitor_data
638 auth = self._generate_sapisidhash_header(origin)
f4f751af 639 if auth is not None:
640 headers['Authorization'] = auth
109dd3b2 641 headers['X-Origin'] = origin
f4f751af 642 return headers
29f7c58a 643
2d6659b9 644 @staticmethod
645 def _build_api_continuation_query(continuation, ctp=None):
646 query = {
647 'continuation': continuation
648 }
649 # TODO: Inconsistency with clickTrackingParams.
650 # Currently we have a fixed ctp contained within context (from ytcfg)
651 # and a ctp in root query for continuation.
652 if ctp:
653 query['clickTracking'] = {'clickTrackingParams': ctp}
654 return query
655
2d6659b9 656 @classmethod
657 def _extract_next_continuation_data(cls, renderer):
658 next_continuation = try_get(
659 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
660 lambda x: x['continuation']['reloadContinuationData']), dict)
661 if not next_continuation:
662 return
663 continuation = next_continuation.get('continuation')
664 if not continuation:
665 return
666 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 667 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 668
669 @classmethod
670 def _extract_continuation_ep_data(cls, continuation_ep: dict):
671 if isinstance(continuation_ep, dict):
672 continuation = try_get(
673 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
674 if not continuation:
675 return
676 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 677 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 678
679 @classmethod
680 def _extract_continuation(cls, renderer):
681 next_continuation = cls._extract_next_continuation_data(renderer)
682 if next_continuation:
683 return next_continuation
fe93e2c4 684
2d6659b9 685 contents = []
686 for key in ('contents', 'items'):
687 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 688
2d6659b9 689 for content in contents:
690 if not isinstance(content, dict):
691 continue
692 continuation_ep = try_get(
693 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
694 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
695 dict)
696 continuation = cls._extract_continuation_ep_data(continuation_ep)
697 if continuation:
698 return continuation
699
fe93e2c4 700 @classmethod
701 def _extract_alerts(cls, data):
109dd3b2 702 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
703 if not isinstance(alert_dict, dict):
704 continue
705 for alert in alert_dict.values():
706 alert_type = alert.get('type')
707 if not alert_type:
708 continue
052e1350 709 message = cls._get_text(alert, 'text')
109dd3b2 710 if message:
711 yield alert_type, message
712
713 def _report_alerts(self, alerts, expected=True):
714 errors = []
715 warnings = []
716 for alert_type, alert_message in alerts:
717 if alert_type.lower() == 'error':
718 errors.append([alert_type, alert_message])
719 else:
720 warnings.append([alert_type, alert_message])
721
722 for alert_type, alert_message in (warnings + errors[:-1]):
723 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
724 if errors:
725 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
726
727 def _extract_and_report_alerts(self, data, *args, **kwargs):
728 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
729
47193e02 730 def _extract_badges(self, renderer: dict):
731 badges = set()
732 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
733 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
734 if label:
735 badges.add(label.lower())
736 return badges
737
738 @staticmethod
052e1350 739 def _get_text(data, *path_list, max_runs=None):
740 for path in path_list or [None]:
741 if path is None:
742 obj = [data]
743 else:
744 obj = traverse_obj(data, path, default=[])
745 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
746 obj = [obj]
747 for item in obj:
748 text = try_get(item, lambda x: x['simpleText'], compat_str)
749 if text:
750 return text
751 runs = try_get(item, lambda x: x['runs'], list) or []
752 if not runs and isinstance(item, list):
753 runs = item
754
755 runs = runs[:min(len(runs), max_runs or len(runs))]
756 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
757 if text:
758 return text
47193e02 759
109dd3b2 760 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
761 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
762 default_client='WEB'):
763 response = None
764 last_error = None
765 count = -1
766 retries = self.get_param('extractor_retries', 3)
767 if check_get_keys is None:
768 check_get_keys = []
769 while count < retries:
770 count += 1
771 if last_error:
772 self.report_warning('%s. Retrying ...' % last_error)
773 try:
774 response = self._call_api(
775 ep=ep, fatal=True, headers=headers,
776 video_id=item_id, query=query,
777 context=self._extract_context(ytcfg, default_client),
778 api_key=self._extract_api_key(ytcfg, default_client),
779 api_hostname=api_hostname, default_client=default_client,
780 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
781 except ExtractorError as e:
9c0d7f49 782 if isinstance(e.cause, network_exceptions):
109dd3b2 783 # Downloading page may result in intermittent 5xx HTTP error
784 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 785 # We also want to catch all other network exceptions since errors in later pages can be troublesome
786 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
787 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
788 last_error = error_to_compat_str(e.cause or e)
789 if count < retries:
790 continue
109dd3b2 791 if fatal:
792 raise
793 else:
794 self.report_warning(error_to_compat_str(e))
795 return
796
797 else:
798 # Youtube may send alerts if there was an issue with the continuation page
799 try:
800 self._extract_and_report_alerts(response, expected=False)
801 except ExtractorError as e:
802 if fatal:
803 raise
804 self.report_warning(error_to_compat_str(e))
805 return
806 if not check_get_keys or dict_get(response, check_get_keys):
807 break
808 # Youtube sometimes sends incomplete data
809 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
810 last_error = 'Incomplete data received'
811 if count >= retries:
812 if fatal:
813 raise ExtractorError(last_error)
814 else:
815 self.report_warning(last_error)
816 return
817 return response
818
9297939e 819 @staticmethod
820 def is_music_url(url):
821 return re.match(r'https?://music\.youtube\.com/', url) is not None
822
30a074c2 823 def _extract_video(self, renderer):
824 video_id = renderer.get('videoId')
052e1350 825 title = self._get_text(renderer, 'title')
826 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 827 duration = parse_duration(self._get_text(
828 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 829 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 830 view_count = str_to_int(self._search_regex(
831 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
832 'view count', default=None))
fe93e2c4 833
052e1350 834 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
fe93e2c4 835
30a074c2 836 return {
39ed931e 837 '_type': 'url',
30a074c2 838 'ie_key': YoutubeIE.ie_key(),
839 'id': video_id,
840 'url': video_id,
841 'title': title,
842 'description': description,
843 'duration': duration,
844 'view_count': view_count,
845 'uploader': uploader,
846 }
847
0c148415 848
360e1ca5 849class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 850 IE_DESC = 'YouTube.com'
bc2ca1bb 851 _INVIDIOUS_SITES = (
852 # invidious-redirect websites
853 r'(?:www\.)?redirect\.invidious\.io',
854 r'(?:(?:www|dev)\.)?invidio\.us',
855 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
856 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 857 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 858 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 859 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 860 # youtube-dl invidious instances list
861 r'(?:(?:www|no)\.)?invidiou\.sh',
862 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
863 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 864 r'(?:www\.)?invidious\.mastodon\.host',
865 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 866 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 867 r'(?:www\.)?invidious\.tinfoil-hat\.net',
868 r'(?:www\.)?invidious\.himiko\.cloud',
869 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 870 r'(?:www\.)?invidious\.tube',
871 r'(?:www\.)?invidiou\.site',
872 r'(?:www\.)?invidious\.site',
873 r'(?:www\.)?invidious\.xyz',
874 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 875 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 876 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 877 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 878 r'(?:www\.)?tube\.poal\.co',
879 r'(?:www\.)?tube\.connect\.cafe',
880 r'(?:www\.)?vid\.wxzm\.sx',
881 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 882 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 883 r'(?:www\.)?yewtu\.be',
884 r'(?:www\.)?yt\.elukerio\.org',
885 r'(?:www\.)?yt\.lelux\.fi',
886 r'(?:www\.)?invidious\.ggc-project\.de',
887 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 888 r'(?:www\.)?ytprivate\.com',
889 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 890 r'(?:www\.)?invidious\.toot\.koeln',
891 r'(?:www\.)?invidious\.fdn\.fr',
892 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 893 r'(?:www\.)?invidious\.namazso\.eu',
894 r'(?:www\.)?invidious\.silkky\.cloud',
895 r'(?:www\.)?invidious\.exonip\.de',
896 r'(?:www\.)?invidious\.riverside\.rocks',
897 r'(?:www\.)?invidious\.blamefran\.net',
898 r'(?:www\.)?invidious\.moomoo\.de',
899 r'(?:www\.)?ytb\.trom\.tf',
900 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 901 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
902 r'(?:www\.)?qklhadlycap4cnod\.onion',
903 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
904 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
905 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
906 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
907 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
908 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 909 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
910 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
911 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
912 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 913 )
cb7dfeea 914 _VALID_URL = r"""(?x)^
c5e8d7af 915 (
edb53e2d 916 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 917 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
918 (?:www\.)?deturl\.com/www\.youtube\.com|
919 (?:www\.)?pwnyoutube\.com|
920 (?:www\.)?hooktube\.com|
921 (?:www\.)?yourepeat\.com|
922 tube\.majestyc\.net|
923 %(invidious)s|
924 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
925 (?:.*?\#/)? # handle anchor (#/) redirect urls
926 (?: # the various things that can precede the ID:
ac7553d0 927 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 928 |(?: # or the v= param in all its forms
f7000f3a 929 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 930 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 931 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
932 v=
933 )
f4b05232 934 ))
cbaed4bb
S
935 |(?:
936 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
937 vid\.plus| # or vid.plus/xxxx
938 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 939 %(invidious)s
cbaed4bb 940 )/
edb53e2d 941 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 942 )
c5e8d7af 943 )? # all until now is optional -> you can pass the naked ID
201c1459 944 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 945 (?(1).+)? # if we found the ID, everything can follow
9297939e 946 (?:\#|$)""" % {
bc2ca1bb 947 'invidious': '|'.join(_INVIDIOUS_SITES),
948 }
e40c758c 949 _PLAYER_INFO_RE = (
cc2db878 950 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
951 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 952 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 953 )
2c62dc26 954 _formats = {
c2d3cb4c 955 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
956 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
957 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
958 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
959 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
960 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
961 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
962 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 963 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 964 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
965 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
966 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
967 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
968 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
969 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 970 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 971 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
972 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 973
974
975 # 3D videos
c2d3cb4c 976 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
977 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
978 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
979 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 980 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
981 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
982 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 983
96fb5605 984 # Apple HTTP Live Streaming
11f12195 985 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 986 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
987 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
988 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
989 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
990 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 991 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
992 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
993
994 # DASH mp4 video
d23028a8
S
995 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
996 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
997 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
998 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
999 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1000 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1001 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1002 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1003 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1004 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1005 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1006 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1007
f6f1fc92 1008 # Dash mp4 audio
d23028a8
S
1009 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1010 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1011 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1012 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1013 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1014 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1015 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1016
1017 # Dash webm
d23028a8
S
1018 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1019 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1020 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1021 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1022 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1023 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1024 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1025 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1026 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1027 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1028 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1029 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1030 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1031 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1032 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1033 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1034 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1035 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1036 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1037 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1038 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1039 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1040
1041 # Dash webm audio
d23028a8
S
1042 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1043 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1044
0857baad 1045 # Dash webm audio with opus inside
d23028a8
S
1046 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1047 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1048 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1049
ce6b9a2d
PH
1050 # RTMP (unnamed)
1051 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1052
1053 # av01 video only formats sometimes served with "unknown" codecs
1054 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1055 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1056 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1057 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 1058 }
29f7c58a 1059 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1060
109dd3b2 1061 _AGE_GATE_REASONS = (
1062 'Sign in to confirm your age',
1063 'This video may be inappropriate for some users.',
1064 'Sorry, this content is age-restricted.')
1065
fd5c4aab
S
1066 _GEO_BYPASS = False
1067
78caa52a 1068 IE_NAME = 'youtube'
2eb88d95
PH
1069 _TESTS = [
1070 {
2d3d2997 1071 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1072 'info_dict': {
1073 'id': 'BaW_jenozKc',
1074 'ext': 'mp4',
3867038a 1075 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1076 'uploader': 'Philipp Hagemeister',
1077 'uploader_id': 'phihag',
ec85ded8 1078 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
1079 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1080 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1081 'upload_date': '20121002',
3867038a 1082 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 1083 'categories': ['Science & Technology'],
3867038a 1084 'tags': ['youtube-dl'],
556dbe7f 1085 'duration': 10,
dbdaaa23 1086 'view_count': int,
3e7c1224
PH
1087 'like_count': int,
1088 'dislike_count': int,
7c80519c 1089 'start_time': 1,
297a564b 1090 'end_time': 9,
2eb88d95 1091 }
0e853ca4 1092 },
fccd3771 1093 {
4bc3a23e
PH
1094 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1095 'note': 'Embed-only video (#1746)',
1096 'info_dict': {
1097 'id': 'yZIXLfi8CZQ',
1098 'ext': 'mp4',
1099 'upload_date': '20120608',
1100 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1101 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1102 'uploader': 'SET India',
94bfcd23 1103 'uploader_id': 'setindia',
ec85ded8 1104 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1105 'age_limit': 18,
545cc85d 1106 },
1107 'skip': 'Private video',
fccd3771 1108 },
11b56058 1109 {
8bdd16b4 1110 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1111 'note': 'Use the first video ID in the URL',
1112 'info_dict': {
1113 'id': 'BaW_jenozKc',
1114 'ext': 'mp4',
3867038a 1115 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1116 'uploader': 'Philipp Hagemeister',
1117 'uploader_id': 'phihag',
ec85ded8 1118 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1119 'upload_date': '20121002',
3867038a 1120 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1121 'categories': ['Science & Technology'],
3867038a 1122 'tags': ['youtube-dl'],
556dbe7f 1123 'duration': 10,
dbdaaa23 1124 'view_count': int,
11b56058
PM
1125 'like_count': int,
1126 'dislike_count': int,
34a7de29
S
1127 },
1128 'params': {
1129 'skip_download': True,
1130 },
11b56058 1131 },
dd27fd17 1132 {
2d3d2997 1133 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1134 'note': '256k DASH audio (format 141) via DASH manifest',
1135 'info_dict': {
1136 'id': 'a9LDPn-MO4I',
1137 'ext': 'm4a',
1138 'upload_date': '20121002',
1139 'uploader_id': '8KVIDEO',
ec85ded8 1140 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1141 'description': '',
1142 'uploader': '8KVIDEO',
1143 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1144 },
4bc3a23e
PH
1145 'params': {
1146 'youtube_include_dash_manifest': True,
1147 'format': '141',
4919603f 1148 },
de3c7fe0 1149 'skip': 'format 141 not served anymore',
dd27fd17 1150 },
8bdd16b4 1151 # DASH manifest with encrypted signature
1152 {
1153 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1154 'info_dict': {
1155 'id': 'IB3lcPjvWLA',
1156 'ext': 'm4a',
1157 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1158 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1159 'duration': 244,
1160 'uploader': 'AfrojackVEVO',
1161 'uploader_id': 'AfrojackVEVO',
1162 'upload_date': '20131011',
cc2db878 1163 'abr': 129.495,
8bdd16b4 1164 },
1165 'params': {
1166 'youtube_include_dash_manifest': True,
1167 'format': '141/bestaudio[ext=m4a]',
1168 },
1169 },
dd2d55f1 1170 # Normal age-gate video (embed allowed)
c522adb1 1171 {
2d3d2997 1172 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1173 'info_dict': {
1174 'id': 'HtVdAasjOgU',
1175 'ext': 'mp4',
1176 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1177 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1178 'duration': 142,
c522adb1
JMF
1179 'uploader': 'The Witcher',
1180 'uploader_id': 'WitcherGame',
ec85ded8 1181 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1182 'upload_date': '20140605',
34952f09 1183 'age_limit': 18,
c522adb1
JMF
1184 },
1185 },
8bdd16b4 1186 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1187 # YouTube Red ad is not captured for creator
1188 {
1189 'url': '__2ABJjxzNo',
1190 'info_dict': {
1191 'id': '__2ABJjxzNo',
1192 'ext': 'mp4',
1193 'duration': 266,
1194 'upload_date': '20100430',
1195 'uploader_id': 'deadmau5',
1196 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1197 'creator': 'deadmau5',
1198 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1199 'uploader': 'deadmau5',
1200 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1201 'alt_title': 'Some Chords',
8bdd16b4 1202 },
1203 'expected_warnings': [
1204 'DASH manifest missing',
1205 ]
1206 },
067aa17e 1207 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1208 {
1209 'url': 'lqQg6PlCWgI',
1210 'info_dict': {
1211 'id': 'lqQg6PlCWgI',
1212 'ext': 'mp4',
556dbe7f 1213 'duration': 6085,
90227264 1214 'upload_date': '20150827',
cbe2bd91 1215 'uploader_id': 'olympic',
ec85ded8 1216 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1217 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1218 'uploader': 'Olympics',
cbe2bd91
PH
1219 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1220 },
1221 'params': {
1222 'skip_download': 'requires avconv',
e52a40ab 1223 }
cbe2bd91 1224 },
6271f1ca
PH
1225 # Non-square pixels
1226 {
1227 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1228 'info_dict': {
1229 'id': '_b-2C3KPAM0',
1230 'ext': 'mp4',
1231 'stretched_ratio': 16 / 9.,
556dbe7f 1232 'duration': 85,
6271f1ca
PH
1233 'upload_date': '20110310',
1234 'uploader_id': 'AllenMeow',
ec85ded8 1235 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1236 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1237 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1238 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1239 },
06b491eb
S
1240 },
1241 # url_encoded_fmt_stream_map is empty string
1242 {
1243 'url': 'qEJwOuvDf7I',
1244 'info_dict': {
1245 'id': 'qEJwOuvDf7I',
f57b7835 1246 'ext': 'webm',
06b491eb
S
1247 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1248 'description': '',
1249 'upload_date': '20150404',
1250 'uploader_id': 'spbelect',
1251 'uploader': 'Наблюдатели Петербурга',
1252 },
1253 'params': {
1254 'skip_download': 'requires avconv',
e323cf3f
S
1255 },
1256 'skip': 'This live event has ended.',
06b491eb 1257 },
067aa17e 1258 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1259 {
1260 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1261 'info_dict': {
1262 'id': 'FIl7x6_3R5Y',
eb6793ba 1263 'ext': 'webm',
da77d856
S
1264 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1265 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1266 'duration': 220,
da77d856
S
1267 'upload_date': '20150625',
1268 'uploader_id': 'dorappi2000',
ec85ded8 1269 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1270 'uploader': 'dorappi2000',
eb6793ba 1271 'formats': 'mincount:31',
da77d856 1272 },
eb6793ba 1273 'skip': 'not actual anymore',
2ee8f5d8 1274 },
8a1a26ce
YCH
1275 # DASH manifest with segment_list
1276 {
1277 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1278 'md5': '8ce563a1d667b599d21064e982ab9e31',
1279 'info_dict': {
1280 'id': 'CsmdDsKjzN8',
1281 'ext': 'mp4',
17ee98e1 1282 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1283 'uploader': 'Airtek',
1284 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1285 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1286 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1287 },
1288 'params': {
1289 'youtube_include_dash_manifest': True,
1290 'format': '135', # bestvideo
be49068d
S
1291 },
1292 'skip': 'This live event has ended.',
2ee8f5d8 1293 },
cf7e015f
S
1294 {
1295 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1296 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1297 'info_dict': {
545cc85d 1298 'id': 'jvGDaLqkpTg',
1299 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1300 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1301 },
1302 'playlist': [{
1303 'info_dict': {
545cc85d 1304 'id': 'jvGDaLqkpTg',
cf7e015f 1305 'ext': 'mp4',
545cc85d 1306 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1307 'description': 'md5:e03b909557865076822aa169218d6a5d',
1308 'duration': 10643,
1309 'upload_date': '20161111',
1310 'uploader': 'Team PGP',
1311 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1312 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1313 },
1314 }, {
1315 'info_dict': {
545cc85d 1316 'id': '3AKt1R1aDnw',
cf7e015f 1317 'ext': 'mp4',
545cc85d 1318 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1319 'description': 'md5:e03b909557865076822aa169218d6a5d',
1320 'duration': 10991,
1321 'upload_date': '20161111',
1322 'uploader': 'Team PGP',
1323 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1324 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1325 },
1326 }, {
1327 'info_dict': {
545cc85d 1328 'id': 'RtAMM00gpVc',
cf7e015f 1329 'ext': 'mp4',
545cc85d 1330 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1331 'description': 'md5:e03b909557865076822aa169218d6a5d',
1332 'duration': 10995,
1333 'upload_date': '20161111',
1334 'uploader': 'Team PGP',
1335 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1336 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1337 },
1338 }, {
1339 'info_dict': {
545cc85d 1340 'id': '6N2fdlP3C5U',
cf7e015f 1341 'ext': 'mp4',
545cc85d 1342 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1343 'description': 'md5:e03b909557865076822aa169218d6a5d',
1344 'duration': 10990,
1345 'upload_date': '20161111',
1346 'uploader': 'Team PGP',
1347 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1348 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1349 },
1350 }],
1351 'params': {
1352 'skip_download': True,
1353 },
cbaed4bb 1354 },
f9f49d87 1355 {
067aa17e 1356 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1357 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1358 'info_dict': {
1359 'id': 'gVfLd0zydlo',
1360 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1361 },
1362 'playlist_count': 2,
be49068d 1363 'skip': 'Not multifeed anymore',
f9f49d87 1364 },
cbaed4bb 1365 {
2d3d2997 1366 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1367 'only_matching': True,
0e49d9a6 1368 },
6d4fc66b 1369 {
2d3d2997 1370 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1371 'only_matching': True,
1372 },
0e49d9a6 1373 {
067aa17e 1374 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1375 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1376 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1377 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1378 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1379 'info_dict': {
1380 'id': 'lsguqyKfVQg',
1381 'ext': 'mp4',
1382 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1383 'alt_title': 'Dark Walk',
0e49d9a6 1384 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1385 'duration': 133,
0e49d9a6
LL
1386 'upload_date': '20151119',
1387 'uploader_id': 'IronSoulElf',
ec85ded8 1388 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1389 'uploader': 'IronSoulElf',
11f9be09 1390 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1391 'track': 'Dark Walk',
1392 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1393 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1394 },
1395 'params': {
1396 'skip_download': True,
1397 },
1398 },
61f92af1 1399 {
067aa17e 1400 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1401 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1402 'only_matching': True,
1403 },
313dfc45
LL
1404 {
1405 # Video with yt:stretch=17:0
1406 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1407 'info_dict': {
1408 'id': 'Q39EVAstoRM',
1409 'ext': 'mp4',
1410 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1411 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1412 'upload_date': '20151107',
1413 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1414 'uploader': 'CH GAMER DROID',
1415 },
1416 'params': {
1417 'skip_download': True,
1418 },
be49068d 1419 'skip': 'This video does not exist.',
313dfc45 1420 },
201c1459 1421 {
1422 # Video with incomplete 'yt:stretch=16:'
1423 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1424 'only_matching': True,
1425 },
7caf9830
S
1426 {
1427 # Video licensed under Creative Commons
1428 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1429 'info_dict': {
1430 'id': 'M4gD1WSo5mA',
1431 'ext': 'mp4',
1432 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1433 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1434 'duration': 721,
7caf9830
S
1435 'upload_date': '20150127',
1436 'uploader_id': 'BerkmanCenter',
ec85ded8 1437 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1438 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1439 'license': 'Creative Commons Attribution license (reuse allowed)',
1440 },
1441 'params': {
1442 'skip_download': True,
1443 },
1444 },
fd050249
S
1445 {
1446 # Channel-like uploader_url
1447 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1448 'info_dict': {
1449 'id': 'eQcmzGIKrzg',
1450 'ext': 'mp4',
1451 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1452 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1453 'duration': 4060,
fd050249 1454 'upload_date': '20151119',
eb6793ba 1455 'uploader': 'Bernie Sanders',
fd050249 1456 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1457 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1458 'license': 'Creative Commons Attribution license (reuse allowed)',
1459 },
1460 'params': {
1461 'skip_download': True,
1462 },
1463 },
040ac686
S
1464 {
1465 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1466 'only_matching': True,
7f29cf54
S
1467 },
1468 {
067aa17e 1469 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1470 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1471 'only_matching': True,
6496ccb4
S
1472 },
1473 {
1474 # Rental video preview
1475 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1476 'info_dict': {
1477 'id': 'uGpuVWrhIzE',
1478 'ext': 'mp4',
1479 'title': 'Piku - Trailer',
1480 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1481 'upload_date': '20150811',
1482 'uploader': 'FlixMatrix',
1483 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1484 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1485 'license': 'Standard YouTube License',
1486 },
1487 'params': {
1488 'skip_download': True,
1489 },
eb6793ba 1490 'skip': 'This video is not available.',
022a5d66 1491 },
12afdc2a
S
1492 {
1493 # YouTube Red video with episode data
1494 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1495 'info_dict': {
1496 'id': 'iqKdEhx-dD4',
1497 'ext': 'mp4',
1498 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1499 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1500 'duration': 2085,
12afdc2a
S
1501 'upload_date': '20170118',
1502 'uploader': 'Vsauce',
1503 'uploader_id': 'Vsauce',
1504 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1505 'series': 'Mind Field',
1506 'season_number': 1,
1507 'episode_number': 1,
1508 },
1509 'params': {
1510 'skip_download': True,
1511 },
1512 'expected_warnings': [
1513 'Skipping DASH manifest',
1514 ],
1515 },
c7121fa7
S
1516 {
1517 # The following content has been identified by the YouTube community
1518 # as inappropriate or offensive to some audiences.
1519 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1520 'info_dict': {
1521 'id': '6SJNVb0GnPI',
1522 'ext': 'mp4',
1523 'title': 'Race Differences in Intelligence',
1524 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1525 'duration': 965,
1526 'upload_date': '20140124',
1527 'uploader': 'New Century Foundation',
1528 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1529 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1530 },
1531 'params': {
1532 'skip_download': True,
1533 },
545cc85d 1534 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1535 },
022a5d66
S
1536 {
1537 # itag 212
1538 'url': '1t24XAntNCY',
1539 'only_matching': True,
fd5c4aab
S
1540 },
1541 {
1542 # geo restricted to JP
1543 'url': 'sJL6WA-aGkQ',
1544 'only_matching': True,
1545 },
cd5a74a2
S
1546 {
1547 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1548 'only_matching': True,
1549 },
bc2ca1bb 1550 {
1551 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1552 'only_matching': True,
1553 },
1554 {
1555 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1556 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1557 'only_matching': True,
1558 },
825cd268
RA
1559 {
1560 # DRM protected
1561 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1562 'only_matching': True,
4fe54c12
S
1563 },
1564 {
1565 # Video with unsupported adaptive stream type formats
1566 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1567 'info_dict': {
1568 'id': 'Z4Vy8R84T1U',
1569 'ext': 'mp4',
1570 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1571 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1572 'duration': 433,
1573 'upload_date': '20130923',
1574 'uploader': 'Amelia Putri Harwita',
1575 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1576 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1577 'formats': 'maxcount:10',
1578 },
1579 'params': {
1580 'skip_download': True,
1581 'youtube_include_dash_manifest': False,
1582 },
5429d6a9 1583 'skip': 'not actual anymore',
5caabd3c 1584 },
1585 {
822b9d9c 1586 # Youtube Music Auto-generated description
5caabd3c 1587 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1588 'info_dict': {
1589 'id': 'MgNrAu2pzNs',
1590 'ext': 'mp4',
1591 'title': 'Voyeur Girl',
1592 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1593 'upload_date': '20190312',
5429d6a9
S
1594 'uploader': 'Stephen - Topic',
1595 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1596 'artist': 'Stephen',
1597 'track': 'Voyeur Girl',
1598 'album': 'it\'s too much love to know my dear',
1599 'release_date': '20190313',
1600 'release_year': 2019,
1601 },
1602 'params': {
1603 'skip_download': True,
1604 },
1605 },
66b48727
RA
1606 {
1607 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1608 'only_matching': True,
1609 },
011e75e6
S
1610 {
1611 # invalid -> valid video id redirection
1612 'url': 'DJztXj2GPfl',
1613 'info_dict': {
1614 'id': 'DJztXj2GPfk',
1615 'ext': 'mp4',
1616 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1617 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1618 'upload_date': '20090125',
1619 'uploader': 'Prochorowka',
1620 'uploader_id': 'Prochorowka',
1621 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1622 'artist': 'Panjabi MC',
1623 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1624 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1625 },
1626 'params': {
1627 'skip_download': True,
1628 },
545cc85d 1629 'skip': 'Video unavailable',
ea74e00b
DP
1630 },
1631 {
1632 # empty description results in an empty string
1633 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1634 'info_dict': {
1635 'id': 'x41yOUIvK2k',
1636 'ext': 'mp4',
1637 'title': 'IMG 3456',
1638 'description': '',
1639 'upload_date': '20170613',
1640 'uploader_id': 'ElevageOrVert',
1641 'uploader': 'ElevageOrVert',
1642 },
1643 'params': {
1644 'skip_download': True,
1645 },
1646 },
a0566bbf 1647 {
29f7c58a 1648 # with '};' inside yt initial data (see [1])
1649 # see [2] for an example with '};' inside ytInitialPlayerResponse
1650 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1651 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1652 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1653 'info_dict': {
1654 'id': 'CHqg6qOn4no',
1655 'ext': 'mp4',
1656 'title': 'Part 77 Sort a list of simple types in c#',
1657 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1658 'upload_date': '20130831',
1659 'uploader_id': 'kudvenkat',
1660 'uploader': 'kudvenkat',
1661 },
1662 'params': {
1663 'skip_download': True,
1664 },
1665 },
29f7c58a 1666 {
1667 # another example of '};' in ytInitialData
1668 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1669 'only_matching': True,
1670 },
1671 {
1672 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1673 'only_matching': True,
1674 },
545cc85d 1675 {
cc2db878 1676 # https://github.com/ytdl-org/youtube-dl/pull/28094
1677 'url': 'OtqTfy26tG0',
1678 'info_dict': {
1679 'id': 'OtqTfy26tG0',
1680 'ext': 'mp4',
1681 'title': 'Burn Out',
1682 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1683 'upload_date': '20141120',
1684 'uploader': 'The Cinematic Orchestra - Topic',
1685 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1686 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1687 'artist': 'The Cinematic Orchestra',
1688 'track': 'Burn Out',
1689 'album': 'Every Day',
1690 'release_data': None,
1691 'release_year': None,
1692 },
1693 'params': {
1694 'skip_download': True,
1695 },
545cc85d 1696 },
bc2ca1bb 1697 {
1698 # controversial video, only works with bpctr when authenticated with cookies
1699 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1700 'only_matching': True,
1701 },
a1a7907b 1702 {
1703 # controversial video, requires bpctr/contentCheckOk
1704 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1705 'info_dict': {
1706 'id': 'SZJvDhaSDnc',
1707 'ext': 'mp4',
1708 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1709 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1710 'uploader': 'CBS This Morning',
11f9be09 1711 'uploader_id': 'CBSThisMorning',
a1a7907b 1712 'upload_date': '20140716',
1713 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1714 }
1715 },
f7ad7160 1716 {
1717 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1718 'url': 'cBvYw8_A0vQ',
1719 'info_dict': {
1720 'id': 'cBvYw8_A0vQ',
1721 'ext': 'mp4',
1722 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1723 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1724 'upload_date': '20201120',
1725 'uploader': 'Walk around Japan',
1726 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1727 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1728 },
1729 'params': {
1730 'skip_download': True,
1731 },
0fb983f6 1732 }, {
1733 # Has multiple audio streams
1734 'url': 'WaOKSUlf4TM',
1735 'only_matching': True
9297939e 1736 }, {
1737 # Requires Premium: has format 141 when requested using YTM url
1738 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1739 'only_matching': True
1740 }, {
120916da 1741 # multiple subtitles with same lang_code
1742 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1743 'only_matching': True,
109dd3b2 1744 }, {
1745 # Force use android client fallback
1746 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1747 'info_dict': {
1748 'id': 'YOelRv7fMxY',
11f9be09 1749 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1750 'ext': '3gp',
1751 'upload_date': '20210624',
1752 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1753 'uploader': 'colinfurze',
11f9be09 1754 'uploader_id': 'colinfurze',
109dd3b2 1755 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1756 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1757 },
1758 'params': {
1759 'format': '17', # 3gp format available on android
1760 'extractor_args': {'youtube': {'player_client': ['android']}},
1761 },
120916da 1762 },
109dd3b2 1763 {
1764 # Skip download of additional client configs (remix client config in this case)
1765 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1766 'only_matching': True,
1767 'params': {
1768 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1769 },
1770 }
2eb88d95
PH
1771 ]
1772
201c1459 1773 @classmethod
1774 def suitable(cls, url):
1bdae7d3 1775 # Hack for lazy extractors until more generic solution is implemented
1776 # (see #28780)
1777 from .youtube import parse_qs
201c1459 1778 qs = parse_qs(url)
1779 if qs.get('list', [None])[0]:
1780 return False
1781 return super(YoutubeIE, cls).suitable(url)
1782
e0df6211
PH
1783 def __init__(self, *args, **kwargs):
1784 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1785 self._code_cache = {}
83799698 1786 self._player_cache = {}
e0df6211 1787
109dd3b2 1788 def _extract_player_url(self, ytcfg=None, webpage=None):
1789 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
11f9be09 1790 if not player_url and webpage:
109dd3b2 1791 player_url = self._search_regex(
1792 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1793 webpage, 'player URL', fatal=False)
11f9be09 1794 if not player_url:
1795 return None
109dd3b2 1796 if player_url.startswith('//'):
1797 player_url = 'https:' + player_url
1798 elif not re.match(r'https?://', player_url):
1799 player_url = compat_urlparse.urljoin(
1800 'https://www.youtube.com', player_url)
1801 return player_url
1802
60064c53
PH
1803 def _signature_cache_id(self, example_sig):
1804 """ Return a string representation of a signature """
78caa52a 1805 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1806
e40c758c
S
1807 @classmethod
1808 def _extract_player_info(cls, player_url):
1809 for player_re in cls._PLAYER_INFO_RE:
1810 id_m = re.search(player_re, player_url)
1811 if id_m:
1812 break
1813 else:
c081b35c 1814 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1815 return id_m.group('id')
e40c758c 1816
109dd3b2 1817 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1818 player_id = self._extract_player_info(player_url)
1819 if player_id not in self._code_cache:
1820 self._code_cache[player_id] = self._download_webpage(
1821 player_url, video_id, fatal=fatal,
1822 note='Downloading player ' + player_id,
1823 errnote='Download of %s failed' % player_url)
1824 return player_id in self._code_cache
1825
e40c758c 1826 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1827 player_id = self._extract_player_info(player_url)
e0df6211 1828
c4417ddb 1829 # Read from filesystem cache
545cc85d 1830 func_id = 'js_%s_%s' % (
1831 player_id, self._signature_cache_id(example_sig))
c4417ddb 1832 assert os.path.basename(func_id) == func_id
a0e07d31 1833
69ea8ca4 1834 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1835 if cache_spec is not None:
78caa52a 1836 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1837
109dd3b2 1838 if self._load_player(video_id, player_url):
1839 code = self._code_cache[player_id]
1840 res = self._parse_sig_js(code)
e0df6211 1841
109dd3b2 1842 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1843 cache_res = res(test_string)
1844 cache_spec = [ord(c) for c in cache_res]
83799698 1845
109dd3b2 1846 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1847 return res
83799698 1848
60064c53 1849 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1850 def gen_sig_code(idxs):
1851 def _genslice(start, end, step):
78caa52a 1852 starts = '' if start == 0 else str(start)
8bcc8756 1853 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1854 steps = '' if step == 1 else (':%d' % step)
78caa52a 1855 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1856
1857 step = None
7af808a5
PH
1858 # Quelch pyflakes warnings - start will be set when step is set
1859 start = '(Never used)'
edf3e38e
PH
1860 for i, prev in zip(idxs[1:], idxs[:-1]):
1861 if step is not None:
1862 if i - prev == step:
1863 continue
1864 yield _genslice(start, prev, step)
1865 step = None
1866 continue
1867 if i - prev in [-1, 1]:
1868 step = i - prev
1869 start = prev
1870 continue
1871 else:
78caa52a 1872 yield 's[%d]' % prev
edf3e38e 1873 if step is None:
78caa52a 1874 yield 's[%d]' % i
edf3e38e
PH
1875 else:
1876 yield _genslice(start, i, step)
1877
78caa52a 1878 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1879 cache_res = func(test_string)
edf3e38e 1880 cache_spec = [ord(c) for c in cache_res]
78caa52a 1881 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1882 signature_id_tuple = '(%s)' % (
1883 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1884 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1885 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1886 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1887
e0df6211
PH
1888 def _parse_sig_js(self, jscode):
1889 funcname = self._search_regex(
abefc03f
S
1890 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1891 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1892 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1893 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1894 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1895 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1896 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1897 # Obsolete patterns
1898 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1899 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1900 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1901 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1902 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1903 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1904 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1905 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1906 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1907
1908 jsi = JSInterpreter(jscode)
1909 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1910 return lambda s: initial_function([s])
1911
545cc85d 1912 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1913 """Turn the encrypted s field into a working signature"""
6b37f0be 1914
c8bf86d5 1915 if player_url is None:
69ea8ca4 1916 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1917
c8bf86d5 1918 try:
62af3a0e 1919 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1920 if player_id not in self._player_cache:
1921 func = self._extract_signature_function(
60064c53 1922 video_id, player_url, s
c8bf86d5
PH
1923 )
1924 self._player_cache[player_id] = func
1925 func = self._player_cache[player_id]
a06916d9 1926 if self.get_param('youtube_print_sig_code'):
60064c53 1927 self._print_sig_code(func, s)
c8bf86d5
PH
1928 return func(s)
1929 except Exception as e:
1930 tb = traceback.format_exc()
1931 raise ExtractorError(
78caa52a 1932 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1933
109dd3b2 1934 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1935 """
1936 Extract signatureTimestamp (sts)
1937 Required to tell API what sig/player version is in use.
1938 """
1939 sts = None
1940 if isinstance(ytcfg, dict):
1941 sts = int_or_none(ytcfg.get('STS'))
1942
1943 if not sts:
1944 # Attempt to extract from player
1945 if player_url is None:
1946 error_msg = 'Cannot extract signature timestamp without player_url.'
1947 if fatal:
1948 raise ExtractorError(error_msg)
1949 self.report_warning(error_msg)
1950 return
1951 if self._load_player(video_id, player_url, fatal=fatal):
1952 player_id = self._extract_player_info(player_url)
1953 code = self._code_cache[player_id]
1954 sts = int_or_none(self._search_regex(
1955 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1956 'JS player signature timestamp', group='sts', fatal=fatal))
1957 return sts
1958
11f9be09 1959 def _mark_watched(self, video_id, player_responses):
352d63fd 1960 playback_url = traverse_obj(
1961 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1962 expected_type=url_or_none, get_all=False)
d77ab8e2 1963 if not playback_url:
352d63fd 1964 self.report_warning('Unable to mark watched')
d77ab8e2
S
1965 return
1966 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1967 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1968
1969 # cpn generation algorithm is reverse engineered from base.js.
1970 # In fact it works even with dummy cpn.
1971 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1972 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1973
1974 qs.update({
1975 'ver': ['2'],
1976 'cpn': [cpn],
1977 })
1978 playback_url = compat_urlparse.urlunparse(
15707c7e 1979 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1980
1981 self._download_webpage(
1982 playback_url, video_id, 'Marking watched',
1983 'Unable to mark watched', fatal=False)
1984
66c9fa36
S
1985 @staticmethod
1986 def _extract_urls(webpage):
1987 # Embedded YouTube player
1988 entries = [
1989 unescapeHTML(mobj.group('url'))
1990 for mobj in re.finditer(r'''(?x)
1991 (?:
1992 <iframe[^>]+?src=|
1993 data-video-url=|
1994 <embed[^>]+?src=|
1995 embedSWF\(?:\s*|
1996 <object[^>]+data=|
1997 new\s+SWFObject\(
1998 )
1999 (["\'])
2000 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2001 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2002 \1''', webpage)]
2003
2004 # lazyYT YouTube embed
2005 entries.extend(list(map(
2006 unescapeHTML,
2007 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2008
2009 # Wordpress "YouTube Video Importer" plugin
2010 matches = re.findall(r'''(?x)<div[^>]+
2011 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2012 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2013 entries.extend(m[-1] for m in matches)
2014
2015 return entries
2016
2017 @staticmethod
2018 def _extract_url(webpage):
2019 urls = YoutubeIE._extract_urls(webpage)
2020 return urls[0] if urls else None
2021
97665381
PH
2022 @classmethod
2023 def extract_id(cls, url):
2024 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2025 if mobj is None:
69ea8ca4 2026 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
2027 video_id = mobj.group(2)
2028 return video_id
2029
7c365c21 2030 def _extract_chapters_from_json(self, data, duration):
2031 chapter_list = traverse_obj(
2032 data, (
2033 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2034 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2035 ), expected_type=list)
2036
2037 return self._extract_chapters(
2038 chapter_list,
2039 chapter_time=lambda chapter: float_or_none(
2040 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2041 chapter_title=lambda chapter: traverse_obj(
2042 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2043 duration=duration)
2044
2045 def _extract_chapters_from_engagement_panel(self, data, duration):
2046 content_list = traverse_obj(
8bdd16b4 2047 data,
7c365c21 2048 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2049 expected_type=list, default=[])
052e1350 2050 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2051 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2052
2053 return next((
2054 filter(None, (
2055 self._extract_chapters(
2056 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2057 chapter_time, chapter_title, duration)
2058 for contents in content_list
2059 ))), [])
2060
2061 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2062 chapters = []
7c365c21 2063 last_chapter = {'start_time': 0}
2064 for idx, chapter in enumerate(chapter_list or []):
2065 title = chapter_title(chapter)
84213ea8
S
2066 start_time = chapter_time(chapter)
2067 if start_time is None:
2068 continue
7c365c21 2069 last_chapter['end_time'] = start_time
2070 if start_time < last_chapter['start_time']:
2071 if idx == 1:
2072 chapters.pop()
2073 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2074 else:
2075 self.report_warning(f'Invalid start time for chapter "{title}"')
2076 continue
2077 last_chapter = {'start_time': start_time, 'title': title}
2078 chapters.append(last_chapter)
2079 last_chapter['end_time'] = duration
84213ea8
S
2080 return chapters
2081
545cc85d 2082 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2083 return self._parse_json(self._search_regex(
2084 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2085 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2086
d92f5d5a 2087 @staticmethod
2088 def parse_time_text(time_text):
2089 """
2090 Parse the comment time text
2091 time_text is in the format 'X units ago (edited)'
2092 """
2093 time_text_split = time_text.split(' ')
2094 if len(time_text_split) >= 3:
da503b7a 2095 try:
2096 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2097 except ValueError:
2098 return None
d92f5d5a 2099
a1c5d2ca
M
2100 def _extract_comment(self, comment_renderer, parent=None):
2101 comment_id = comment_renderer.get('commentId')
2102 if not comment_id:
2103 return
fe93e2c4 2104
052e1350 2105 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2106
49bd8c66 2107 # note: timestamp is an estimate calculated from the current time and time_text
052e1350 2108 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
fe93e2c4 2109 time_text_dt = self.parse_time_text(time_text)
2110 if isinstance(time_text_dt, datetime.datetime):
2111 timestamp = calendar.timegm(time_text_dt.timetuple())
052e1350 2112 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2113 author_id = try_get(comment_renderer,
2114 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2115
49bd8c66 2116 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2117 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2118 author_thumbnail = try_get(comment_renderer,
2119 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2120
2121 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2122 is_favorited = 'creatorHeart' in (try_get(
2123 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2124 return {
2125 'id': comment_id,
2126 'text': text,
d92f5d5a 2127 'timestamp': timestamp,
a1c5d2ca
M
2128 'time_text': time_text,
2129 'like_count': votes,
97524332 2130 'is_favorited': is_favorited,
a1c5d2ca
M
2131 'author': author,
2132 'author_id': author_id,
2133 'author_thumbnail': author_thumbnail,
2134 'author_is_uploader': author_is_uploader,
2135 'parent': parent or 'root'
2136 }
2137
2138 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2139 ytcfg, video_id, parent=None, comment_counts=None):
2140
2141 def extract_header(contents):
2142 _total_comments = 0
2143 _continuation = None
2144 for content in contents:
2145 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2146 expected_comment_count = parse_count(self._get_text(
052e1350 2147 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2148
2d6659b9 2149 if expected_comment_count:
fe93e2c4 2150 comment_counts[1] = expected_comment_count
2151 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2152 _total_comments = comment_counts[1]
2153 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2154 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2155
2156 sort_menu_item = try_get(
2157 comments_header_renderer,
2158 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2159 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2160
2161 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2162 if not _continuation:
2163 continue
2164
2165 sort_text = sort_menu_item.get('title')
2166 if isinstance(sort_text, compat_str):
2167 sort_text = sort_text.lower()
2168 else:
2169 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2170 self.to_screen('Sorting comments by %s' % sort_text)
2171 break
2172 return _total_comments, _continuation
a1c5d2ca 2173
2d6659b9 2174 def extract_thread(contents):
a1c5d2ca
M
2175 if not parent:
2176 comment_counts[2] = 0
2177 for content in contents:
2178 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2179 comment_renderer = try_get(
2180 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2181 content, (lambda x: x['commentRenderer'], dict))
2182
2183 if not comment_renderer:
2184 continue
2185 comment = self._extract_comment(comment_renderer, parent)
2186 if not comment:
2187 continue
2188 comment_counts[0] += 1
2189 yield comment
2190 # Attempt to get the replies
2191 comment_replies_renderer = try_get(
2192 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2193
2194 if comment_replies_renderer:
2195 comment_counts[2] += 1
2196 comment_entries_iter = self._comment_entries(
f4f751af 2197 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2198 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2199
2200 for reply_comment in comment_entries_iter:
2201 yield reply_comment
2202
2d6659b9 2203 # YouTube comments have a max depth of 2
2204 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2205 if max_depth == 1 and parent:
2206 return
a1c5d2ca
M
2207 if not comment_counts:
2208 # comment so far, est. total comments, current comment thread #
2209 comment_counts = [0, 0, 0]
a1c5d2ca 2210
2d6659b9 2211 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2212 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2213 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2214 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2215 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2216
2217 visitor_data = None
2218 is_first_continuation = parent is None
a1c5d2ca
M
2219
2220 for page_num in itertools.count(0):
2221 if not continuation:
2222 break
11f9be09 2223 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2224 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2225 if page_num == 0:
2226 if is_first_continuation:
2227 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2228 else:
2d6659b9 2229 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2230 comment_counts[2], comment_prog_str)
2231 else:
2232 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2233 ' ' if parent else '', ' replies' if parent else '',
2234 page_num, comment_prog_str)
2235
2236 response = self._extract_response(
fe93e2c4 2237 item_id=None, query=continuation,
2d6659b9 2238 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2239 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2240 if not response:
2241 break
f4f751af 2242 visitor_data = try_get(
2243 response,
2244 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2245 compat_str) or visitor_data
a1c5d2ca 2246
2d6659b9 2247 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2248
2d6659b9 2249 continuation = None
2250 if isinstance(continuation_contents, list):
2251 for continuation_section in continuation_contents:
2252 if not isinstance(continuation_section, dict):
2253 continue
2254 continuation_items = try_get(
2255 continuation_section,
2256 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2257 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2258 list) or []
2259 if is_first_continuation:
2260 total_comments, continuation = extract_header(continuation_items)
2261 if total_comments:
2262 yield total_comments
2263 is_first_continuation = False
2264 if continuation:
2265 break
2266 continue
2267 count = 0
2268 for count, entry in enumerate(extract_thread(continuation_items)):
2269 yield entry
2270 continuation = self._extract_continuation({'contents': continuation_items})
2271 if continuation:
2272 # Sometimes YouTube provides a continuation without any comments
2273 # In most cases we end up just downloading these with very little comments to come.
2274 if count == 0:
2275 if not parent:
2276 self.report_warning('No comments received - assuming end of comments')
2277 continuation = None
a1c5d2ca
M
2278 break
2279
2d6659b9 2280 # Deprecated response structure
2281 elif isinstance(continuation_contents, dict):
2282 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2283 for key, continuation_renderer in continuation_contents.items():
2284 if key not in known_continuation_renderers:
2285 continue
2286 if not isinstance(continuation_renderer, dict):
2287 continue
2288 if is_first_continuation:
2289 header_continuation_items = [continuation_renderer.get('header') or {}]
2290 total_comments, continuation = extract_header(header_continuation_items)
2291 if total_comments:
2292 yield total_comments
2293 is_first_continuation = False
2294 if continuation:
2295 break
a1c5d2ca 2296
2d6659b9 2297 # Sometimes YouTube provides a continuation without any comments
2298 # In most cases we end up just downloading these with very little comments to come.
2299 count = 0
2300 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2301 yield entry
2302 continuation = self._extract_continuation(continuation_renderer)
2303 if count == 0:
2304 if not parent:
2305 self.report_warning('No comments received - assuming end of comments')
2306 continuation = None
2307 break
a1c5d2ca 2308
2d6659b9 2309 @staticmethod
2310 def _generate_comment_continuation(video_id):
2311 """
2312 Generates initial comment section continuation token from given video id
2313 """
2314 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2315 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2316 new_continuation_intlist = list(itertools.chain.from_iterable(
2317 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2318 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2319
2320 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2321 """Entry for comment extraction"""
2d6659b9 2322 def _real_comment_extract(contents):
2323 if isinstance(contents, list):
2324 for entry in contents:
2325 for key, renderer in entry.items():
2326 if key not in known_entry_comment_renderers:
2327 continue
2328 yield from self._comment_entries(
2329 renderer, video_id=video_id, ytcfg=ytcfg,
2330 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2331 account_syncid=self._extract_account_syncid(ytcfg))
2332 break
a1c5d2ca 2333 comments = []
2d6659b9 2334 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2335 estimated_total = 0
2d6659b9 2336 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2337
2d6659b9 2338 try:
2339 for comment in _real_comment_extract(contents):
2340 if len(comments) >= max_comments:
2341 break
2342 if isinstance(comment, int):
2343 estimated_total = comment
2344 continue
2345 comments.append(comment)
2346 except KeyboardInterrupt:
2347 self.to_screen('Interrupted by user')
d92f5d5a 2348 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2349 return {
2350 'comments': comments,
2351 'comment_count': len(comments),
2352 }
2353
109dd3b2 2354 @staticmethod
2355 def _generate_player_context(sts=None):
2356 context = {
2357 'html5Preference': 'HTML5_PREF_WANTS',
2358 }
2359 if sts is not None:
2360 context['signatureTimestamp'] = sts
2361 return {
2362 'playbackContext': {
2363 'contentPlaybackContext': context
a1a7907b 2364 },
2fd226f6 2365 'contentCheckOk': True,
2366 'racyCheckOk': True
109dd3b2 2367 }
2368
4e6767b5 2369 @staticmethod
c888ffb9 2370 def _get_video_info_params(video_id, client='TVHTML5'):
2371 GVI_CLIENTS = {
2372 'ANDROID': {
2373 'c': 'ANDROID',
2374 'cver': '16.20',
2375 },
2376 'TVHTML5': {
2377 'c': 'TVHTML5',
2378 'cver': '6.20180913',
11f9be09 2379 },
2380 'IOS': {
2381 'c': 'IOS',
2382 'cver': '16.20'
c888ffb9 2383 }
2384 }
2385 query = {
4e6767b5 2386 'video_id': video_id,
2387 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c888ffb9 2388 'html5': '1'
4e6767b5 2389 }
c888ffb9 2390 query.update(GVI_CLIENTS.get(client))
2391 return query
4e6767b5 2392
11f9be09 2393 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
109dd3b2 2394
11f9be09 2395 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2396 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2397 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2398 headers = self.generate_api_headers(
2399 player_ytcfg, identity_token, syncid,
2400 default_client=self._YT_CLIENTS[client], session_index=session_index)
9297939e 2401
11f9be09 2402 yt_query = {'videoId': video_id}
2403 yt_query.update(self._generate_player_context(sts))
2404 return self._extract_response(
2405 item_id=video_id, ep='player', query=yt_query,
2406 ytcfg=player_ytcfg, headers=headers, fatal=False,
2407 default_client=self._YT_CLIENTS[client],
2408 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2409 ) or None
2410
2411 def _extract_age_gated_player_response(self, client, video_id, ytcfg, identity_token, player_url, initial_pr):
c8fa48fd 2412 # get_video_info endpoint seems to be completely dead
f703a880 2413 gvi_client = None # self._YT_CLIENTS.get(f'_{client}_agegate')
c8fa48fd 2414 if gvi_client:
2415 pr = self._parse_json(traverse_obj(
2416 compat_parse_qs(self._download_webpage(
2417 self.http_scheme() + '//www.youtube.com/get_video_info', video_id,
2418 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2419 'unable to download video info webpage', fatal=False,
2420 query=self._get_video_info_params(video_id, client=gvi_client))),
2421 ('player_response', 0), expected_type=str) or '{}', video_id)
2422 if pr:
2423 return pr
2424 self.report_warning('Falling back to embedded-only age-gate workaround')
2425
2426 if not self._YT_CLIENTS.get(f'_{client}_embedded'):
11f9be09 2427 return
11f9be09 2428 embed_webpage = None
2429 if client == 'web' and 'configs' not in self._configuration_arg('player_skip'):
2430 embed_webpage = self._download_webpage(
2431 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2432 video_id=video_id, note=f'Downloading age-gated {client} embed config')
2433
2434 ytcfg_age = self.extract_ytcfg(video_id, embed_webpage) or {}
2435 # If we extracted the embed webpage, it'll tell us if we can view the video
2436 embedded_pr = self._parse_json(
2437 traverse_obj(ytcfg_age, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
2438 video_id=video_id)
2439 embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
2440 if embedded_ps_reason in self._AGE_GATE_REASONS:
2441 return
2442 return self._extract_player_response(
2443 f'_{client}_embedded', video_id,
2444 ytcfg_age or ytcfg, ytcfg_age if client == 'web' else {},
2445 identity_token, player_url, initial_pr)
545cc85d 2446
11f9be09 2447 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2448 requested_clients = []
2449 allowed_clients = [client for client in self._YT_CLIENTS.keys() if client[:1] != '_']
2450 for client in self._configuration_arg('player_client'):
2451 if client in allowed_clients:
2452 requested_clients.append(client)
2453 elif client == 'all':
2454 requested_clients.extend(allowed_clients)
2455 else:
2456 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2457 if not requested_clients:
2458 requested_clients = ['android', 'web']
cf7e015f 2459
11f9be09 2460 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2461 requested_clients.extend(
2462 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
dbdaaa23 2463
11f9be09 2464 return orderedSet(requested_clients)
cf7e015f 2465
11f9be09 2466 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2467 initial_pr = None
2468 if webpage:
2469 initial_pr = self._extract_yt_initial_variable(
2470 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2471 video_id, 'initial player response')
6b09401b 2472
11f9be09 2473 for client in clients:
2474 player_ytcfg = master_ytcfg if client == 'web' else {}
ad34b295 2475 if client == 'web' and initial_pr:
11f9be09 2476 pr = initial_pr
8fe10494 2477 else:
11f9be09 2478 if client == 'web_music' and 'configs' not in self._configuration_arg('player_skip'):
2479 ytm_webpage = self._download_webpage(
2480 'https://music.youtube.com',
2481 video_id, fatal=False, note='Downloading remix client config')
2482 player_ytcfg = self.extract_ytcfg(video_id, ytm_webpage) or {}
2483 pr = self._extract_player_response(
2484 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2485 if pr:
2486 yield pr
ad34b295 2487 if traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
11f9be09 2488 pr = self._extract_age_gated_player_response(
2489 client, video_id, player_ytcfg or master_ytcfg, identity_token, player_url, initial_pr)
2490 if pr:
2491 yield pr
2492 # Android player_response does not have microFormats which are needed for
2493 # extraction of some data. So we return the initial_pr with formats
2494 # stripped out even if not requested by the user
2495 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2496 if initial_pr and 'web' not in clients:
2497 initial_pr['streamingData'] = None
2498 yield initial_pr
2499
2500 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2501 itags, stream_ids = [], []
2a9c6dcd 2502 itag_qualities, res_qualities = {}, {}
d3fc8074 2503 q = qualities([
2a9c6dcd 2504 # Normally tiny is the smallest video-only formats. But
2505 # audio-only formats with unknown quality may get tagged as tiny
2506 'tiny',
2507 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2508 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2509 ])
11f9be09 2510 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2511
545cc85d 2512 for fmt in streaming_formats:
2513 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2514 continue
321bf820 2515
cc2db878 2516 itag = str_or_none(fmt.get('itag'))
9297939e 2517 audio_track = fmt.get('audioTrack') or {}
2518 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2519 if stream_id in stream_ids:
2520 continue
2521
cc2db878 2522 quality = fmt.get('quality')
2a9c6dcd 2523 height = int_or_none(fmt.get('height'))
d3fc8074 2524 if quality == 'tiny' or not quality:
2525 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2526 # The 3gp format (17) in android client has a quality of "small",
2527 # but is actually worse than other formats
2528 if itag == '17':
2529 quality = 'tiny'
2530 if quality:
2531 if itag:
2532 itag_qualities[itag] = quality
2533 if height:
2534 res_qualities[height] = quality
cc2db878 2535 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2536 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2537 # number of fragment that would subsequently requested with (`&sq=N`)
2538 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2539 continue
2540
545cc85d 2541 fmt_url = fmt.get('url')
2542 if not fmt_url:
2543 sc = compat_parse_qs(fmt.get('signatureCipher'))
2544 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2545 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2546 if not (sc and fmt_url and encrypted_sig):
2547 continue
545cc85d 2548 if not player_url:
201e9eaa 2549 continue
545cc85d 2550 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2551 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2552 fmt_url += '&' + sp + '=' + signature
2553
545cc85d 2554 if itag:
2555 itags.append(itag)
9297939e 2556 stream_ids.append(stream_id)
2557
cc2db878 2558 tbr = float_or_none(
2559 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2560 dct = {
2561 'asr': int_or_none(fmt.get('audioSampleRate')),
2562 'filesize': int_or_none(fmt.get('contentLength')),
2563 'format_id': itag,
11f9be09 2564 'format_note': ', '.join(filter(None, (
2a9c6dcd 2565 audio_track.get('displayName'),
2566 fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
545cc85d 2567 'fps': int_or_none(fmt.get('fps')),
2a9c6dcd 2568 'height': height,
dca3ff4a 2569 'quality': q(quality),
cc2db878 2570 'tbr': tbr,
545cc85d 2571 'url': fmt_url,
2a9c6dcd 2572 'width': int_or_none(fmt.get('width')),
0fb983f6 2573 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2574 }
60bdb7bd 2575 mime_mobj = re.match(
2576 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2577 if mime_mobj:
2578 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2579 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2580 no_audio = dct.get('acodec') == 'none'
2581 no_video = dct.get('vcodec') == 'none'
2582 if no_audio:
2583 dct['vbr'] = tbr
2584 if no_video:
2585 dct['abr'] = tbr
2586 if no_audio or no_video:
545cc85d 2587 dct['downloader_options'] = {
2588 # Youtube throttles chunks >~10M
2589 'http_chunk_size': 10485760,
bf1317d2 2590 }
7c60c33e 2591 if dct.get('ext'):
2592 dct['container'] = dct['ext'] + '_dash'
11f9be09 2593 yield dct
545cc85d 2594
4bb6b02f 2595 skip_manifests = self._configuration_arg('skip')
11f9be09 2596 get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
5d3a0e79 2597 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2598
2a9c6dcd 2599 def guess_quality(f):
2600 for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2601 if val in qdict:
2602 return q(qdict[val])
2603 return -1
2604
11f9be09 2605 for sd in streaming_data:
5d3a0e79 2606 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2607 if hls_manifest_url:
2a9c6dcd 2608 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
9297939e 2609 itag = self._search_regex(
2610 r'/itag/(\d+)', f['url'], 'itag', default=None)
11f9be09 2611 if itag in itags:
2612 continue
9297939e 2613 if itag:
2614 f['format_id'] = itag
11f9be09 2615 itags.append(itag)
2a9c6dcd 2616 f['quality'] = guess_quality(f)
11f9be09 2617 yield f
545cc85d 2618
5d3a0e79 2619 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2620 if dash_manifest_url:
2a9c6dcd 2621 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
5d3a0e79 2622 itag = f['format_id']
2623 if itag in itags:
2624 continue
11f9be09 2625 if itag:
2626 itags.append(itag)
2a9c6dcd 2627 f['quality'] = guess_quality(f)
5d3a0e79 2628 filesize = int_or_none(self._search_regex(
2629 r'/clen/(\d+)', f.get('fragment_base_url')
2630 or f['url'], 'file size', default=None))
2631 if filesize:
2632 f['filesize'] = filesize
11f9be09 2633 yield f
2634
2635 def _real_extract(self, url):
2636 url, smuggled_data = unsmuggle_url(url, {})
2637 video_id = self._match_id(url)
2638
2639 base_url = self.http_scheme() + '//www.youtube.com/'
2640 webpage_url = base_url + 'watch?v=' + video_id
2641 webpage = self._download_webpage(
2642 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2643
2644 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2645 player_url = self._extract_player_url(master_ytcfg, webpage)
2646 identity_token = self._extract_identity_token(webpage, video_id)
2647
2648 player_responses = list(self._extract_player_responses(
2649 self._get_requested_clients(url, smuggled_data),
2650 video_id, webpage, master_ytcfg, player_url, identity_token))
2651
352d63fd 2652 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2653
2654 playability_statuses = traverse_obj(
2655 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2656
2657 trailer_video_id = get_first(
2658 playability_statuses,
2659 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2660 expected_type=str)
2661 if trailer_video_id:
2662 return self.url_result(
2663 trailer_video_id, self.ie_key(), trailer_video_id)
2664
2665 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2666 if webpage else (lambda x: None))
2667
2668 video_details = traverse_obj(
2669 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2670 microformats = traverse_obj(
2671 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2672 expected_type=dict, default=[])
2673 video_title = (
2674 get_first(video_details, 'title')
2675 or self._get_text(microformats, (..., 'title'))
2676 or search_meta(['og:title', 'twitter:title', 'title']))
2677 video_description = get_first(video_details, 'shortDescription')
2678
2679 if not smuggled_data.get('force_singlefeed', False):
2680 if not self.get_param('noplaylist'):
2681 multifeed_metadata_list = get_first(
2682 player_responses,
2683 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2684 expected_type=str)
2685 if multifeed_metadata_list:
2686 entries = []
2687 feed_ids = []
2688 for feed in multifeed_metadata_list.split(','):
2689 # Unquote should take place before split on comma (,) since textual
2690 # fields may contain comma as well (see
2691 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2692 feed_data = compat_parse_qs(
2693 compat_urllib_parse_unquote_plus(feed))
2694
2695 def feed_entry(name):
2696 return try_get(
2697 feed_data, lambda x: x[name][0], compat_str)
2698
2699 feed_id = feed_entry('id')
2700 if not feed_id:
2701 continue
2702 feed_title = feed_entry('title')
2703 title = video_title
2704 if feed_title:
2705 title += ' (%s)' % feed_title
2706 entries.append({
2707 '_type': 'url_transparent',
2708 'ie_key': 'Youtube',
2709 'url': smuggle_url(
2710 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2711 {'force_singlefeed': True}),
2712 'title': title,
2713 })
2714 feed_ids.append(feed_id)
2715 self.to_screen(
2716 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2717 % (', '.join(feed_ids), video_id))
2718 return self.playlist_result(
2719 entries, video_id, video_title, video_description)
2720 else:
2721 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2722
7ea65411 2723 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2724 is_live = get_first(video_details, 'isLive')
7ea65411 2725 if is_live is None:
2726 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2727
2728 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2729 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2730
545cc85d 2731 if not formats:
11f9be09 2732 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
b7da73eb 2733 self.raise_no_formats(
545cc85d 2734 'This video is DRM protected.', expected=True)
11f9be09 2735 pemr = get_first(
2736 playability_statuses,
2737 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2738 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2739 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2740 if subreason:
545cc85d 2741 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2742 countries = get_first(microformats, 'availableCountries')
545cc85d 2743 if not countries:
2744 regions_allowed = search_meta('regionsAllowed')
2745 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2746 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2747 reason += f'. {subreason}'
545cc85d 2748 if reason:
b7da73eb 2749 self.raise_no_formats(reason, expected=True)
bf1317d2 2750
11f9be09 2751 for f in formats:
2a9c6dcd 2752 if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
11f9be09 2753 f['source_preference'] = -10
2a9c6dcd 2754 note = f.get('format_note')
2755 f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
11f9be09 2756
2a9c6dcd 2757 # Source is given priority since formats that throttle are given lower source_preference
2758 # When throttling issue is fully fixed, remove this
2759 self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
bf1317d2 2760
11f9be09 2761 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2762 if not keywords and webpage:
2763 keywords = [
2764 unescapeHTML(m.group('content'))
2765 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2766 for keyword in keywords:
2767 if keyword.startswith('yt:stretch='):
201c1459 2768 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2769 if mobj:
2770 # NB: float is intentional for forcing float division
2771 w, h = (float(v) for v in mobj.groups())
2772 if w > 0 and h > 0:
2773 ratio = w / h
2774 for f in formats:
2775 if f.get('vcodec') != 'none':
2776 f['stretched_ratio'] = ratio
2777 break
6449cd80 2778
545cc85d 2779 thumbnails = []
11f9be09 2780 thumbnail_dicts = traverse_obj(
2781 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2782 expected_type=dict, default=[])
2783 for thumbnail in thumbnail_dicts:
2784 thumbnail_url = thumbnail.get('url')
2785 if not thumbnail_url:
2786 continue
2787 # Sometimes youtube gives a wrong thumbnail URL. See:
2788 # https://github.com/yt-dlp/yt-dlp/issues/233
2789 # https://github.com/ytdl-org/youtube-dl/issues/28023
2790 if 'maxresdefault' in thumbnail_url:
2791 thumbnail_url = thumbnail_url.split('?')[0]
2792 thumbnails.append({
2793 'url': thumbnail_url,
2794 'height': int_or_none(thumbnail.get('height')),
2795 'width': int_or_none(thumbnail.get('width')),
2796 })
ff2751ac 2797 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2798 if thumbnail_url:
2799 thumbnails.append({
2800 'url': thumbnail_url,
ff2751ac 2801 })
0ba692ac 2802 # The best resolution thumbnails sometimes does not appear in the webpage
2803 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2804 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2805 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
245524e6 2806 # TODO: Test them also? - For some videos, even these don't exist
cca80fe6 2807 guaranteed_thumbnail_names = [
2808 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2809 'mqdefault', 'mq1', 'mq2', 'mq3',
2810 'default', '1', '2', '3'
2811 ]
2812 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2813 n_thumbnail_names = len(thumbnail_names)
2814
0ba692ac 2815 thumbnails.extend({
2816 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2817 video_id=video_id, name=name, ext=ext,
2818 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2819 '_test_url': name in hq_thumbnail_names,
2820 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2821 for thumb in thumbnails:
cca80fe6 2822 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2823 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2824 self._remove_duplicate_formats(thumbnails)
545cc85d 2825
7ea65411 2826 category = get_first(microformats, 'category') or search_meta('genre')
2827 channel_id = str_or_none(
2828 get_first(video_details, 'channelId')
2829 or get_first(microformats, 'externalChannelId')
2830 or search_meta('channelId'))
2831 duration = int_or_none(
2832 get_first(video_details, 'lengthSeconds')
2833 or get_first(microformats, 'lengthSeconds')
2834 or parse_duration(search_meta('duration'))) or None
2835 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2836
2837 live_content = get_first(video_details, 'isLiveContent')
2838 is_upcoming = get_first(video_details, 'isUpcoming')
2839 if is_live is None:
2840 if is_upcoming or live_content is False:
2841 is_live = False
2842 if is_upcoming is None and (live_content or is_live):
2843 is_upcoming = False
2844 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2845 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2846 if not duration and live_endtime and live_starttime:
2847 duration = live_endtime - live_starttime
2848
545cc85d 2849 info = {
2850 'id': video_id,
2851 'title': self._live_title(video_title) if is_live else video_title,
2852 'formats': formats,
2853 'thumbnails': thumbnails,
2854 'description': video_description,
2855 'upload_date': unified_strdate(
11f9be09 2856 get_first(microformats, 'uploadDate')
545cc85d 2857 or search_meta('uploadDate')),
11f9be09 2858 'uploader': get_first(video_details, 'author'),
545cc85d 2859 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2860 'uploader_url': owner_profile_url,
2861 'channel_id': channel_id,
11f9be09 2862 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2863 'duration': duration,
2864 'view_count': int_or_none(
11f9be09 2865 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2866 or search_meta('interactionCount')),
11f9be09 2867 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2868 'age_limit': 18 if (
11f9be09 2869 get_first(microformats, 'isFamilySafe') is False
545cc85d 2870 or search_meta('isFamilyFriendly') == 'false'
2871 or search_meta('og:restrictions:age') == '18+') else 0,
2872 'webpage_url': webpage_url,
2873 'categories': [category] if category else None,
2874 'tags': keywords,
11f9be09 2875 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2876 'is_live': is_live,
2877 'was_live': (False if is_live or is_upcoming or live_content is False
2878 else None if is_live is None or is_upcoming is None
2879 else live_content),
2880 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2881 'release_timestamp': live_starttime,
545cc85d 2882 }
b477fc13 2883
3944e7af 2884 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2885 # Converted into dicts to remove duplicates
2886 captions = {
2887 sub.get('baseUrl'): sub
2888 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2889 translation_languages = {
2890 lang.get('languageCode'): lang.get('languageName')
2891 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
545cc85d 2892 subtitles = {}
2893 if pctr:
774d79cc 2894 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2895 lang_subs = container.setdefault(lang_code, [])
545cc85d 2896 for fmt in self._SUBTITLE_FORMATS:
2897 query.update({
2898 'fmt': fmt,
2899 })
2900 lang_subs.append({
2901 'ext': fmt,
2902 'url': update_url_query(base_url, query),
774d79cc 2903 'name': sub_name,
545cc85d 2904 })
7e72694b 2905
3944e7af 2906 for base_url, caption_track in captions.items():
545cc85d 2907 if not base_url:
2908 continue
2909 if caption_track.get('kind') != 'asr':
120916da 2910 lang_code = (
2911 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2912 or caption_track.get('languageCode'))
545cc85d 2913 if not lang_code:
2914 continue
2915 process_language(
774d79cc 2916 subtitles, base_url, lang_code,
3944e7af 2917 traverse_obj(caption_track, ('name', 'simpleText')),
774d79cc 2918 {})
545cc85d 2919 continue
2920 automatic_captions = {}
3944e7af 2921 for trans_code, trans_name in translation_languages.items():
2922 if not trans_code:
545cc85d 2923 continue
2924 process_language(
3944e7af 2925 automatic_captions, base_url, trans_code,
2926 self._get_text(trans_name, max_runs=1),
2927 {'tlang': trans_code})
545cc85d 2928 info['automatic_captions'] = automatic_captions
2929 info['subtitles'] = subtitles
7e72694b 2930
545cc85d 2931 parsed_url = compat_urllib_parse_urlparse(url)
2932 for component in [parsed_url.fragment, parsed_url.query]:
2933 query = compat_parse_qs(component)
2934 for k, v in query.items():
2935 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2936 d_k += '_time'
2937 if d_k not in info and k in s_ks:
2938 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2939
2940 # Youtube Music Auto-generated description
822b9d9c 2941 if video_description:
38d70284 2942 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2943 if mobj:
822b9d9c
RA
2944 release_year = mobj.group('release_year')
2945 release_date = mobj.group('release_date')
2946 if release_date:
2947 release_date = release_date.replace('-', '')
2948 if not release_year:
545cc85d 2949 release_year = release_date[:4]
2950 info.update({
2951 'album': mobj.group('album'.strip()),
2952 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2953 'track': mobj.group('track').strip(),
2954 'release_date': release_date,
cc2db878 2955 'release_year': int_or_none(release_year),
545cc85d 2956 })
7e72694b 2957
545cc85d 2958 initial_data = None
2959 if webpage:
2960 initial_data = self._extract_yt_initial_variable(
2961 webpage, self._YT_INITIAL_DATA_RE, video_id,
2962 'yt initial data')
2963 if not initial_data:
11f9be09 2964 headers = self.generate_api_headers(
2965 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2966 session_index=self._extract_session_index(master_ytcfg))
2967
109dd3b2 2968 initial_data = self._extract_response(
2969 item_id=video_id, ep='next', fatal=False,
11f9be09 2970 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
109dd3b2 2971 note='Downloading initial data API JSON')
545cc85d 2972
c60ee3a2 2973 try:
2974 # This will error if there is no livechat
2975 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2976 info['subtitles']['live_chat'] = [{
2977 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2978 'video_id': video_id,
2979 'ext': 'json',
f6745c49 2980 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2981 }]
2982 except (KeyError, IndexError, TypeError):
2983 pass
545cc85d 2984
2985 if initial_data:
7c365c21 2986 info['chapters'] = (
2987 self._extract_chapters_from_json(initial_data, duration)
2988 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2989 or None)
545cc85d 2990
2991 contents = try_get(
2992 initial_data,
2993 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2994 list) or []
2995 for content in contents:
2996 vpir = content.get('videoPrimaryInfoRenderer')
2997 if vpir:
2998 stl = vpir.get('superTitleLink')
2999 if stl:
fe93e2c4 3000 stl = self._get_text(stl)
545cc85d 3001 if try_get(
3002 vpir,
3003 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3004 info['location'] = stl
3005 else:
3006 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3007 if mobj:
3008 info.update({
3009 'series': mobj.group(1),
3010 'season_number': int(mobj.group(2)),
3011 'episode_number': int(mobj.group(3)),
3012 })
3013 for tlb in (try_get(
3014 vpir,
3015 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3016 list) or []):
3017 tbr = tlb.get('toggleButtonRenderer') or {}
3018 for getter, regex in [(
3019 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3020 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3021 lambda x: x['accessibility'],
3022 lambda x: x['accessibilityData']['accessibilityData'],
3023 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3024 label = (try_get(tbr, getter, dict) or {}).get('label')
3025 if label:
3026 mobj = re.match(regex, label)
3027 if mobj:
3028 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3029 break
3030 sbr_tooltip = try_get(
3031 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3032 if sbr_tooltip:
3033 like_count, dislike_count = sbr_tooltip.split(' / ')
3034 info.update({
3035 'like_count': str_to_int(like_count),
3036 'dislike_count': str_to_int(dislike_count),
3037 })
3038 vsir = content.get('videoSecondaryInfoRenderer')
3039 if vsir:
052e1350 3040 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3041 rows = try_get(
3042 vsir,
3043 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3044 list) or []
3045 multiple_songs = False
3046 for row in rows:
3047 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3048 multiple_songs = True
3049 break
3050 for row in rows:
3051 mrr = row.get('metadataRowRenderer') or {}
3052 mrr_title = mrr.get('title')
3053 if not mrr_title:
3054 continue
052e1350 3055 mrr_title = self._get_text(mrr, 'title')
3056 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3057 if mrr_title == 'License':
3058 info['license'] = mrr_contents_text
3059 elif not multiple_songs:
3060 if mrr_title == 'Album':
3061 info['album'] = mrr_contents_text
3062 elif mrr_title == 'Artist':
3063 info['artist'] = mrr_contents_text
3064 elif mrr_title == 'Song':
3065 info['track'] = mrr_contents_text
3066
3067 fallbacks = {
3068 'channel': 'uploader',
3069 'channel_id': 'uploader_id',
3070 'channel_url': 'uploader_url',
3071 }
3072 for to, frm in fallbacks.items():
3073 if not info.get(to):
3074 info[to] = info.get(frm)
3075
3076 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3077 v = info.get(s_k)
3078 if v:
3079 info[d_k] = v
b84071c0 3080
11f9be09 3081 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3082 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3083 is_membersonly = None
b28f8d24 3084 is_premium = None
c224251a
M
3085 if initial_data and is_private is not None:
3086 is_membersonly = False
b28f8d24 3087 is_premium = False
47193e02 3088 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3089 badge_labels = set()
3090 for content in contents:
3091 if not isinstance(content, dict):
3092 continue
3093 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3094 for badge_label in badge_labels:
3095 if badge_label.lower() == 'members only':
3096 is_membersonly = True
3097 elif badge_label.lower() == 'premium':
3098 is_premium = True
3099 elif badge_label.lower() == 'unlisted':
3100 is_unlisted = True
c224251a 3101
c224251a
M
3102 info['availability'] = self._availability(
3103 is_private=is_private,
b28f8d24 3104 needs_premium=is_premium,
c224251a
M
3105 needs_subscription=is_membersonly,
3106 needs_auth=info['age_limit'] >= 18,
3107 is_unlisted=None if is_private is None else is_unlisted)
3108
06167fbb 3109 # get xsrf for annotations or comments
a06916d9 3110 get_annotations = self.get_param('writeannotations', False)
3111 get_comments = self.get_param('getcomments', False)
06167fbb 3112 if get_annotations or get_comments:
29f7c58a 3113 xsrf_token = None
11f9be09 3114 if master_ytcfg:
3115 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
29f7c58a 3116 if not xsrf_token:
3117 xsrf_token = self._search_regex(
3118 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 3119 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 3120
3121 # annotations
06167fbb 3122 if get_annotations:
11f9be09 3123 invideo_url = get_first(
3124 player_responses,
3125 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3126 expected_type=str)
64b6a4e9 3127 if xsrf_token and invideo_url:
29f7c58a 3128 xsrf_field_name = None
11f9be09 3129 if master_ytcfg:
3130 xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
29f7c58a 3131 if not xsrf_field_name:
3132 xsrf_field_name = self._search_regex(
3133 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3134 webpage, 'xsrf field name',
29f7c58a 3135 group='xsrf_field_name', default='session_token')
8a784c74 3136 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3137 self._proto_relative_url(invideo_url),
3138 video_id, note='Downloading annotations',
3139 errnote='Unable to download video annotations', fatal=False,
3140 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3141
277d6ff5 3142 if get_comments:
11f9be09 3143 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3144
11f9be09 3145 self.mark_watched(video_id, player_responses)
d77ab8e2 3146
545cc85d 3147 return info
c5e8d7af 3148
5f6a1245 3149
8bdd16b4 3150class YoutubeTabIE(YoutubeBaseInfoExtractor):
3151 IE_DESC = 'YouTube.com tab'
70d5c17b 3152 _VALID_URL = r'''(?x)
3153 https?://
3154 (?:\w+\.)?
3155 (?:
3156 youtube(?:kids)?\.com|
3157 invidio\.us
3158 )/
3159 (?:
fe03a6cd 3160 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3161 (?P<not_channel>
9ba5705a 3162 feed/|hashtag/|
70d5c17b 3163 (?:playlist|watch)\?.*?\blist=
3164 )|
29f7c58a 3165 (?!(?:%s)\b) # Direct URLs
70d5c17b 3166 )
3167 (?P<id>[^/?\#&]+)
3168 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3169 IE_NAME = 'youtube:tab'
3170
81127aa5 3171 _TESTS = [{
da692b79 3172 'note': 'playlists, multipage',
8bdd16b4 3173 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3174 'playlist_mincount': 94,
3175 'info_dict': {
3176 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3177 'title': 'Игорь Клейнер - Playlists',
3178 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3179 'uploader': 'Игорь Клейнер',
3180 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3181 },
3182 }, {
da692b79 3183 'note': 'playlists, multipage, different order',
8bdd16b4 3184 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3185 'playlist_mincount': 94,
3186 'info_dict': {
3187 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3188 'title': 'Игорь Клейнер - Playlists',
3189 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3190 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3191 'uploader': 'Игорь Клейнер',
8bdd16b4 3192 },
201c1459 3193 }, {
da692b79 3194 'note': 'playlists, series',
201c1459 3195 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3196 'playlist_mincount': 5,
3197 'info_dict': {
3198 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3199 'title': '3Blue1Brown - Playlists',
3200 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3201 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3202 'uploader': '3Blue1Brown',
201c1459 3203 },
8bdd16b4 3204 }, {
da692b79 3205 'note': 'playlists, singlepage',
8bdd16b4 3206 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3207 'playlist_mincount': 4,
3208 'info_dict': {
3209 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3210 'title': 'ThirstForScience - Playlists',
3211 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3212 'uploader': 'ThirstForScience',
3213 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3214 }
3215 }, {
3216 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3217 'only_matching': True,
3218 }, {
da692b79 3219 'note': 'basic, single video playlist',
0e30a7b9 3220 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3221 'info_dict': {
0e30a7b9 3222 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3223 'uploader': 'Sergey M.',
3224 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3225 'title': 'youtube-dl public playlist',
81127aa5 3226 },
0e30a7b9 3227 'playlist_count': 1,
9291475f 3228 }, {
da692b79 3229 'note': 'empty playlist',
0e30a7b9 3230 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3231 'info_dict': {
0e30a7b9 3232 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3233 'uploader': 'Sergey M.',
3234 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3235 'title': 'youtube-dl empty playlist',
9291475f
PH
3236 },
3237 'playlist_count': 0,
3238 }, {
da692b79 3239 'note': 'Home tab',
8bdd16b4 3240 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3241 'info_dict': {
8bdd16b4 3242 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3243 'title': 'lex will - Home',
3244 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3245 'uploader': 'lex will',
3246 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3247 },
8bdd16b4 3248 'playlist_mincount': 2,
9291475f 3249 }, {
da692b79 3250 'note': 'Videos tab',
8bdd16b4 3251 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3252 'info_dict': {
8bdd16b4 3253 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3254 'title': 'lex will - Videos',
3255 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3256 'uploader': 'lex will',
3257 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3258 },
8bdd16b4 3259 'playlist_mincount': 975,
9291475f 3260 }, {
da692b79 3261 'note': 'Videos tab, sorted by popular',
8bdd16b4 3262 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3263 'info_dict': {
8bdd16b4 3264 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3265 'title': 'lex will - Videos',
3266 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3267 'uploader': 'lex will',
3268 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3269 },
8bdd16b4 3270 'playlist_mincount': 199,
9291475f 3271 }, {
da692b79 3272 'note': 'Playlists tab',
8bdd16b4 3273 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3274 'info_dict': {
8bdd16b4 3275 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3276 'title': 'lex will - Playlists',
3277 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3278 'uploader': 'lex will',
3279 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3280 },
8bdd16b4 3281 'playlist_mincount': 17,
ac7553d0 3282 }, {
da692b79 3283 'note': 'Community tab',
8bdd16b4 3284 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3285 'info_dict': {
8bdd16b4 3286 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3287 'title': 'lex will - Community',
3288 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3289 'uploader': 'lex will',
3290 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3291 },
3292 'playlist_mincount': 18,
87dadd45 3293 }, {
da692b79 3294 'note': 'Channels tab',
8bdd16b4 3295 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3296 'info_dict': {
8bdd16b4 3297 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3298 'title': 'lex will - Channels',
3299 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3300 'uploader': 'lex will',
3301 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3302 },
deaec5af 3303 'playlist_mincount': 12,
cd684175 3304 }, {
3305 'note': 'Search tab',
3306 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3307 'playlist_mincount': 40,
3308 'info_dict': {
3309 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3310 'title': '3Blue1Brown - Search - linear algebra',
3311 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3312 'uploader': '3Blue1Brown',
3313 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3314 },
6b08cdf6 3315 }, {
a0566bbf 3316 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3317 'only_matching': True,
3318 }, {
a0566bbf 3319 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3320 'only_matching': True,
3321 }, {
a0566bbf 3322 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3323 'only_matching': True,
3324 }, {
3325 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3326 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3327 'info_dict': {
3328 'title': '29C3: Not my department',
3329 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3330 'uploader': 'Christiaan008',
3331 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3332 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3333 },
3334 'playlist_count': 96,
3335 }, {
3336 'note': 'Large playlist',
3337 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3338 'info_dict': {
8bdd16b4 3339 'title': 'Uploads from Cauchemar',
3340 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3341 'uploader': 'Cauchemar',
3342 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3343 },
8bdd16b4 3344 'playlist_mincount': 1123,
3345 }, {
da692b79 3346 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3347 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3348 'only_matching': True,
4b7df0d3
JMF
3349 }, {
3350 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3351 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3352 'info_dict': {
acf757f4
PH
3353 'title': 'Uploads from Interstellar Movie',
3354 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3355 'uploader': 'Interstellar Movie',
8bdd16b4 3356 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3357 },
481cc733 3358 'playlist_mincount': 21,
358de58c 3359 }, {
3360 'note': 'Playlist with "show unavailable videos" button',
3361 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3362 'info_dict': {
3363 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3364 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3365 'uploader': 'Phim Siêu Nhân Nhật Bản',
3366 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3367 },
da692b79 3368 'playlist_mincount': 200,
5d342002 3369 }, {
da692b79 3370 'note': 'Playlist with unavailable videos in page 7',
5d342002 3371 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3372 'info_dict': {
3373 'title': 'Uploads from BlankTV',
3374 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3375 'uploader': 'BlankTV',
3376 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3377 },
da692b79 3378 'playlist_mincount': 1000,
8bdd16b4 3379 }, {
da692b79 3380 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3381 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3382 'info_dict': {
3383 'title': 'Data Analysis with Dr Mike Pound',
3384 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3385 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3386 'uploader': 'Computerphile',
deaec5af 3387 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3388 },
3389 'playlist_mincount': 11,
3390 }, {
a0566bbf 3391 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3392 'only_matching': True,
dacb3a86 3393 }, {
da692b79 3394 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3395 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3396 'info_dict': {
3397 'id': 'FqZTN594JQw',
3398 'ext': 'webm',
3399 'title': "Smiley's People 01 detective, Adventure Series, Action",
3400 'uploader': 'STREEM',
3401 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3402 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3403 'upload_date': '20150526',
3404 'license': 'Standard YouTube License',
3405 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3406 'categories': ['People & Blogs'],
3407 'tags': list,
dbdaaa23 3408 'view_count': int,
dacb3a86
S
3409 'like_count': int,
3410 'dislike_count': int,
3411 },
3412 'params': {
3413 'skip_download': True,
3414 },
13a75688 3415 'skip': 'This video is not available.',
dacb3a86 3416 'add_ie': [YoutubeIE.ie_key()],
481cc733 3417 }, {
8bdd16b4 3418 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3419 'only_matching': True,
66b48727 3420 }, {
8bdd16b4 3421 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3422 'only_matching': True,
a0566bbf 3423 }, {
3424 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3425 'info_dict': {
11f9be09 3426 'id': 'FMtPN8yp5LU', # This will keep changing
a0566bbf 3427 'ext': 'mp4',
deaec5af 3428 'title': compat_str,
a0566bbf 3429 'uploader': 'Sky News',
3430 'uploader_id': 'skynews',
3431 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3432 'upload_date': r're:\d{8}',
3433 'description': compat_str,
a0566bbf 3434 'categories': ['News & Politics'],
3435 'tags': list,
3436 'like_count': int,
3437 'dislike_count': int,
3438 },
3439 'params': {
3440 'skip_download': True,
3441 },
da692b79 3442 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3443 }, {
3444 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3445 'info_dict': {
3446 'id': 'a48o2S1cPoo',
3447 'ext': 'mp4',
3448 'title': 'The Young Turks - Live Main Show',
3449 'uploader': 'The Young Turks',
3450 'uploader_id': 'TheYoungTurks',
3451 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3452 'upload_date': '20150715',
3453 'license': 'Standard YouTube License',
3454 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3455 'categories': ['News & Politics'],
3456 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3457 'like_count': int,
3458 'dislike_count': int,
3459 },
3460 'params': {
3461 'skip_download': True,
3462 },
3463 'only_matching': True,
3464 }, {
3465 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3466 'only_matching': True,
3467 }, {
3468 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3469 'only_matching': True,
09f1580e 3470 }, {
3471 'note': 'A channel that is not live. Should raise error',
3472 'url': 'https://www.youtube.com/user/numberphile/live',
3473 'only_matching': True,
3d3dddc9 3474 }, {
3475 'url': 'https://www.youtube.com/feed/trending',
3476 'only_matching': True,
3477 }, {
3d3dddc9 3478 'url': 'https://www.youtube.com/feed/library',
3479 'only_matching': True,
3480 }, {
3d3dddc9 3481 'url': 'https://www.youtube.com/feed/history',
3482 'only_matching': True,
3483 }, {
3d3dddc9 3484 'url': 'https://www.youtube.com/feed/subscriptions',
3485 'only_matching': True,
3486 }, {
3d3dddc9 3487 'url': 'https://www.youtube.com/feed/watch_later',
3488 'only_matching': True,
3489 }, {
da692b79 3490 'note': 'Recommended - redirects to home page',
3d3dddc9 3491 'url': 'https://www.youtube.com/feed/recommended',
3492 'only_matching': True,
29f7c58a 3493 }, {
da692b79 3494 'note': 'inline playlist with not always working continuations',
29f7c58a 3495 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3496 'only_matching': True,
3497 }, {
3498 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3499 'only_matching': True,
3500 }, {
3501 'url': 'https://www.youtube.com/course',
3502 'only_matching': True,
3503 }, {
3504 'url': 'https://www.youtube.com/zsecurity',
3505 'only_matching': True,
3506 }, {
3507 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3508 'only_matching': True,
3509 }, {
3510 'url': 'https://www.youtube.com/TheYoungTurks/live',
3511 'only_matching': True,
39ed931e 3512 }, {
3513 'url': 'https://www.youtube.com/hashtag/cctv9',
3514 'info_dict': {
3515 'id': 'cctv9',
3516 'title': '#cctv9',
3517 },
3518 'playlist_mincount': 350,
201c1459 3519 }, {
3520 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3521 'only_matching': True,
9297939e 3522 }, {
da692b79 3523 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3524 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3525 'only_matching': True
fe03a6cd 3526 }, {
3527 'note': '/browse/ should redirect to /channel/',
3528 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3529 'only_matching': True
3530 }, {
3531 'note': 'VLPL, should redirect to playlist?list=PL...',
3532 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3533 'info_dict': {
3534 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3535 'uploader': 'NoCopyrightSounds',
3536 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3537 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3538 'title': 'NCS Releases',
3539 },
3540 'playlist_mincount': 166,
18db7548 3541 }, {
3542 'note': 'Topic, should redirect to playlist?list=UU...',
3543 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3544 'info_dict': {
3545 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3546 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3547 'title': 'Uploads from Royalty Free Music - Topic',
3548 'uploader': 'Royalty Free Music - Topic',
3549 },
3550 'expected_warnings': [
3551 'A channel/user page was given',
3552 'The URL does not have a videos tab',
3553 ],
3554 'playlist_mincount': 101,
3555 }, {
3556 'note': 'Topic without a UU playlist',
3557 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3558 'info_dict': {
3559 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3560 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3561 },
3562 'expected_warnings': [
3563 'A channel/user page was given',
3564 'The URL does not have a videos tab',
3565 'Falling back to channel URL',
3566 ],
3567 'playlist_mincount': 9,
abcdd12b 3568 }, {
3569 'note': 'Youtube music Album',
3570 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3571 'info_dict': {
3572 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3573 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3574 },
3575 'playlist_count': 50,
47193e02 3576 }, {
3577 'note': 'unlisted single video playlist',
3578 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3579 'info_dict': {
3580 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3581 'uploader': 'colethedj',
3582 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3583 'title': 'yt-dlp unlisted playlist test',
3584 'availability': 'unlisted'
3585 },
3586 'playlist_count': 1,
29f7c58a 3587 }]
3588
3589 @classmethod
3590 def suitable(cls, url):
3591 return False if YoutubeIE.suitable(url) else super(
3592 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3593
3594 def _extract_channel_id(self, webpage):
3595 channel_id = self._html_search_meta(
3596 'channelId', webpage, 'channel id', default=None)
3597 if channel_id:
3598 return channel_id
3599 channel_url = self._html_search_meta(
3600 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3601 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3602 'twitter:app:url:googleplay'), webpage, 'channel url')
3603 return self._search_regex(
3604 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3605 channel_url, 'channel id')
15f6397c 3606
8bdd16b4 3607 @staticmethod
cd7c66cf 3608 def _extract_basic_item_renderer(item):
3609 # Modified from _extract_grid_item_renderer
201c1459 3610 known_basic_renderers = (
3611 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3612 )
3613 for key, renderer in item.items():
201c1459 3614 if not isinstance(renderer, dict):
cd7c66cf 3615 continue
201c1459 3616 elif key in known_basic_renderers:
3617 return renderer
3618 elif key.startswith('grid') and key.endswith('Renderer'):
3619 return renderer
8bdd16b4 3620
8bdd16b4 3621 def _grid_entries(self, grid_renderer):
3622 for item in grid_renderer['items']:
3623 if not isinstance(item, dict):
39b62db1 3624 continue
cd7c66cf 3625 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3626 if not isinstance(renderer, dict):
3627 continue
052e1350 3628 title = self._get_text(renderer, 'title')
fe93e2c4 3629
8bdd16b4 3630 # playlist
3631 playlist_id = renderer.get('playlistId')
3632 if playlist_id:
3633 yield self.url_result(
3634 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3635 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3636 video_title=title)
201c1459 3637 continue
8bdd16b4 3638 # video
3639 video_id = renderer.get('videoId')
3640 if video_id:
3641 yield self._extract_video(renderer)
201c1459 3642 continue
8bdd16b4 3643 # channel
3644 channel_id = renderer.get('channelId')
3645 if channel_id:
8bdd16b4 3646 yield self.url_result(
3647 'https://www.youtube.com/channel/%s' % channel_id,
3648 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3649 continue
3650 # generic endpoint URL support
3651 ep_url = urljoin('https://www.youtube.com/', try_get(
3652 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3653 compat_str))
3654 if ep_url:
3655 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3656 if ie.suitable(ep_url):
3657 yield self.url_result(
3658 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3659 break
8bdd16b4 3660
3d3dddc9 3661 def _shelf_entries_from_content(self, shelf_renderer):
3662 content = shelf_renderer.get('content')
3663 if not isinstance(content, dict):
8bdd16b4 3664 return
cd7c66cf 3665 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3666 if renderer:
3667 # TODO: add support for nested playlists so each shelf is processed
3668 # as separate playlist
3669 # TODO: this includes only first N items
3670 for entry in self._grid_entries(renderer):
3671 yield entry
3672 renderer = content.get('horizontalListRenderer')
3673 if renderer:
3674 # TODO
3675 pass
8bdd16b4 3676
29f7c58a 3677 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3678 ep = try_get(
3679 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3680 compat_str)
3681 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3682 if shelf_url:
29f7c58a 3683 # Skipping links to another channels, note that checking for
3684 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3685 # will not work
3686 if skip_channels and '/channels?' in shelf_url:
3687 return
052e1350 3688 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3689 yield self.url_result(shelf_url, video_title=title)
3690 # Shelf may not contain shelf URL, fallback to extraction from content
3691 for entry in self._shelf_entries_from_content(shelf_renderer):
3692 yield entry
c5e8d7af 3693
8bdd16b4 3694 def _playlist_entries(self, video_list_renderer):
3695 for content in video_list_renderer['contents']:
3696 if not isinstance(content, dict):
3697 continue
3698 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3699 if not isinstance(renderer, dict):
3700 continue
3701 video_id = renderer.get('videoId')
3702 if not video_id:
3703 continue
3704 yield self._extract_video(renderer)
07aeced6 3705
3462ffa8 3706 def _rich_entries(self, rich_grid_renderer):
3707 renderer = try_get(
70d5c17b 3708 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3709 video_id = renderer.get('videoId')
3710 if not video_id:
3711 return
3712 yield self._extract_video(renderer)
3713
8bdd16b4 3714 def _video_entry(self, video_renderer):
3715 video_id = video_renderer.get('videoId')
3716 if video_id:
3717 return self._extract_video(video_renderer)
dacb3a86 3718
8bdd16b4 3719 def _post_thread_entries(self, post_thread_renderer):
3720 post_renderer = try_get(
3721 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3722 if not post_renderer:
3723 return
3724 # video attachment
3725 video_renderer = try_get(
895b0931 3726 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3727 video_id = video_renderer.get('videoId')
3728 if video_id:
3729 entry = self._extract_video(video_renderer)
8bdd16b4 3730 if entry:
3731 yield entry
895b0931 3732 # playlist attachment
3733 playlist_id = try_get(
3734 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3735 if playlist_id:
3736 yield self.url_result(
e28f1c0a 3737 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3738 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3739 # inline video links
3740 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3741 for run in runs:
3742 if not isinstance(run, dict):
3743 continue
3744 ep_url = try_get(
3745 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3746 if not ep_url:
3747 continue
3748 if not YoutubeIE.suitable(ep_url):
3749 continue
3750 ep_video_id = YoutubeIE._match_id(ep_url)
3751 if video_id == ep_video_id:
3752 continue
895b0931 3753 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3754
8bdd16b4 3755 def _post_thread_continuation_entries(self, post_thread_continuation):
3756 contents = post_thread_continuation.get('contents')
3757 if not isinstance(contents, list):
3758 return
3759 for content in contents:
3760 renderer = content.get('backstagePostThreadRenderer')
3761 if not isinstance(renderer, dict):
3762 continue
3763 for entry in self._post_thread_entries(renderer):
3764 yield entry
07aeced6 3765
39ed931e 3766 r''' # unused
3767 def _rich_grid_entries(self, contents):
3768 for content in contents:
3769 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3770 if video_renderer:
3771 entry = self._video_entry(video_renderer)
3772 if entry:
3773 yield entry
3774 '''
f4f751af 3775 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3776
70d5c17b 3777 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3778 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3779 for content in contents:
3780 if not isinstance(content, dict):
8bdd16b4 3781 continue
70d5c17b 3782 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3783 if not is_renderer:
70d5c17b 3784 renderer = content.get('richItemRenderer')
3462ffa8 3785 if renderer:
3786 for entry in self._rich_entries(renderer):
3787 yield entry
3788 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3789 continue
3462ffa8 3790 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3791 for isr_content in isr_contents:
3792 if not isinstance(isr_content, dict):
3793 continue
69184e41 3794
3795 known_renderers = {
3796 'playlistVideoListRenderer': self._playlist_entries,
3797 'gridRenderer': self._grid_entries,
3798 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3799 'backstagePostThreadRenderer': self._post_thread_entries,
3800 'videoRenderer': lambda x: [self._video_entry(x)],
3801 }
3802 for key, renderer in isr_content.items():
3803 if key not in known_renderers:
3804 continue
3805 for entry in known_renderers[key](renderer):
3806 if entry:
3807 yield entry
3462ffa8 3808 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3809 break
70d5c17b 3810
3462ffa8 3811 if not continuation_list[0]:
3812 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3813
3814 if not continuation_list[0]:
3815 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3816
3817 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3818 tab_content = try_get(tab, lambda x: x['content'], dict)
3819 if not tab_content:
3820 return
3462ffa8 3821 parent_renderer = (
29f7c58a 3822 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3823 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3824 for entry in extract_entries(parent_renderer):
3825 yield entry
3462ffa8 3826 continuation = continuation_list[0]
fe93e2c4 3827 visitor_data = None
d069eca7 3828
8bdd16b4 3829 for page_num in itertools.count(1):
3830 if not continuation:
3831 break
11f9be09 3832 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3833 response = self._extract_response(
3834 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3835 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3836 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3837
3838 if not response:
8bdd16b4 3839 break
f4f751af 3840 visitor_data = try_get(
3841 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3842
69184e41 3843 known_continuation_renderers = {
3844 'playlistVideoListContinuation': self._playlist_entries,
3845 'gridContinuation': self._grid_entries,
3846 'itemSectionContinuation': self._post_thread_continuation_entries,
3847 'sectionListContinuation': extract_entries, # for feeds
3848 }
8bdd16b4 3849 continuation_contents = try_get(
69184e41 3850 response, lambda x: x['continuationContents'], dict) or {}
3851 continuation_renderer = None
3852 for key, value in continuation_contents.items():
3853 if key not in known_continuation_renderers:
3462ffa8 3854 continue
69184e41 3855 continuation_renderer = value
3856 continuation_list = [None]
3857 for entry in known_continuation_renderers[key](continuation_renderer):
3858 yield entry
3859 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3860 break
3861 if continuation_renderer:
3862 continue
c5e8d7af 3863
a1b535bd 3864 known_renderers = {
3865 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3866 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3867 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3868 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3869 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3870 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3871 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3872 }
cce889b9 3873 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3874 continuation_items = try_get(
cce889b9 3875 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3876 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3877 video_items_renderer = None
3878 for key, value in continuation_item.items():
3879 if key not in known_renderers:
8bdd16b4 3880 continue
a1b535bd 3881 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3882 continuation_list = [None]
a1b535bd 3883 for entry in known_renderers[key][0](video_items_renderer):
3884 yield entry
9ba5705a 3885 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3886 break
3887 if video_items_renderer:
3888 continue
8bdd16b4 3889 break
9558dcec 3890
8bdd16b4 3891 @staticmethod
3892 def _extract_selected_tab(tabs):
3893 for tab in tabs:
cd684175 3894 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3895 if renderer.get('selected') is True:
3896 return renderer
2b3c2546 3897 else:
8bdd16b4 3898 raise ExtractorError('Unable to find selected tab')
b82f815f 3899
47193e02 3900 @classmethod
3901 def _extract_uploader(cls, data):
8bdd16b4 3902 uploader = {}
47193e02 3903 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3904 owner = try_get(
3905 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3906 if owner:
3907 uploader['uploader'] = owner.get('text')
3908 uploader['uploader_id'] = try_get(
3909 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3910 uploader['uploader_url'] = urljoin(
3911 'https://www.youtube.com/',
3912 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3913 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3914
d069eca7 3915 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3916 playlist_id = title = description = channel_url = channel_name = channel_id = None
3917 thumbnails_list = tags = []
3918
8bdd16b4 3919 selected_tab = self._extract_selected_tab(tabs)
3920 renderer = try_get(
3921 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3922 if renderer:
b60419c5 3923 channel_name = renderer.get('title')
3924 channel_url = renderer.get('channelUrl')
3925 channel_id = renderer.get('externalId')
39ed931e 3926 else:
64c0d954 3927 renderer = try_get(
3928 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3929
8bdd16b4 3930 if renderer:
3931 title = renderer.get('title')
ecc97af3 3932 description = renderer.get('description', '')
b60419c5 3933 playlist_id = channel_id
3934 tags = renderer.get('keywords', '').split()
3935 thumbnails_list = (
3936 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3937 or try_get(
47193e02 3938 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3939 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3940 list)
b60419c5 3941 or [])
3942
3943 thumbnails = []
3944 for t in thumbnails_list:
3945 if not isinstance(t, dict):
3946 continue
3947 thumbnail_url = url_or_none(t.get('url'))
3948 if not thumbnail_url:
3949 continue
3950 thumbnails.append({
3951 'url': thumbnail_url,
3952 'width': int_or_none(t.get('width')),
3953 'height': int_or_none(t.get('height')),
3954 })
3462ffa8 3955 if playlist_id is None:
70d5c17b 3956 playlist_id = item_id
3957 if title is None:
39ed931e 3958 title = (
3959 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3960 or playlist_id)
b60419c5 3961 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3962 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3963 metadata = {
3964 'playlist_id': playlist_id,
3965 'playlist_title': title,
3966 'playlist_description': description,
3967 'uploader': channel_name,
3968 'uploader_id': channel_id,
3969 'uploader_url': channel_url,
3970 'thumbnails': thumbnails,
3971 'tags': tags,
3972 }
47193e02 3973 availability = self._extract_availability(data)
3974 if availability:
3975 metadata['availability'] = availability
b60419c5 3976 if not channel_id:
3977 metadata.update(self._extract_uploader(data))
3978 metadata.update({
3979 'channel': metadata['uploader'],
3980 'channel_id': metadata['uploader_id'],
3981 'channel_url': metadata['uploader_url']})
11f9be09 3982 ytcfg = self.extract_ytcfg(item_id, webpage)
b60419c5 3983 return self.playlist_result(
d069eca7
M
3984 self._entries(
3985 selected_tab, playlist_id,
3986 self._extract_identity_token(webpage, item_id),
fe93e2c4 3987 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 3988 **metadata)
73c4ac2c 3989
79360d99 3990 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3991 first_id = last_id = None
11f9be09 3992 ytcfg = self.extract_ytcfg(playlist_id, webpage)
3993 headers = self.generate_api_headers(
fe93e2c4 3994 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3995 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 3996 for page_num in itertools.count(1):
cd7c66cf 3997 videos = list(self._playlist_entries(playlist))
3998 if not videos:
3999 return
2be71994 4000 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4001 if start >= len(videos):
4002 return
4003 for video in videos[start:]:
4004 if video['id'] == first_id:
4005 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4006 return
4007 yield video
4008 first_id = first_id or videos[0]['id']
4009 last_id = videos[-1]['id']
79360d99 4010 watch_endpoint = try_get(
4011 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4012 query = {
4013 'playlistId': playlist_id,
4014 'videoId': watch_endpoint.get('videoId') or last_id,
4015 'index': watch_endpoint.get('index') or len(videos),
4016 'params': watch_endpoint.get('params') or 'OAE%3D'
4017 }
4018 response = self._extract_response(
4019 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4020 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4021 check_get_keys='contents'
4022 )
cd7c66cf 4023 playlist = try_get(
79360d99 4024 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4025
79360d99 4026 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 4027 title = playlist.get('title') or try_get(
4028 data, lambda x: x['titleText']['simpleText'], compat_str)
4029 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4030
4031 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4032 playlist_url = urljoin(url, try_get(
4033 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4034 compat_str))
4035 if playlist_url and playlist_url != url:
4036 return self.url_result(
4037 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4038 video_title=title)
cd7c66cf 4039
8bdd16b4 4040 return self.playlist_result(
79360d99 4041 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 4042 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4043
47193e02 4044 def _extract_availability(self, data):
4045 """
4046 Gets the availability of a given playlist/tab.
4047 Note: Unless YouTube tells us explicitly, we do not assume it is public
4048 @param data: response
4049 """
4050 is_private = is_unlisted = None
4051 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4052 badge_labels = self._extract_badges(renderer)
4053
4054 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4055 privacy_dropdown_entries = try_get(
4056 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4057 for renderer_dict in privacy_dropdown_entries:
4058 is_selected = try_get(
4059 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4060 if not is_selected:
4061 continue
052e1350 4062 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4063 if label:
4064 badge_labels.add(label.lower())
4065 break
4066
4067 for badge_label in badge_labels:
4068 if badge_label == 'unlisted':
4069 is_unlisted = True
4070 elif badge_label == 'private':
4071 is_private = True
4072 elif badge_label == 'public':
4073 is_unlisted = is_private = False
4074 return self._availability(is_private, False, False, False, is_unlisted)
4075
4076 @staticmethod
4077 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4078 sidebar_renderer = try_get(
4079 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4080 for item in sidebar_renderer:
4081 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4082 if renderer:
4083 return renderer
4084
358de58c 4085 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4086 """
4087 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4088 """
5d342002 4089 browse_id = params = None
47193e02 4090 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4091 if not renderer:
4092 return
4093 menu_renderer = try_get(
4094 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4095 for menu_item in menu_renderer:
4096 if not isinstance(menu_item, dict):
358de58c 4097 continue
47193e02 4098 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4099 text = try_get(
4100 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4101 if not text or text.lower() != 'show unavailable videos':
4102 continue
4103 browse_endpoint = try_get(
4104 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4105 browse_id = browse_endpoint.get('browseId')
4106 params = browse_endpoint.get('params')
4107 break
5d342002 4108
11f9be09 4109 ytcfg = self.extract_ytcfg(item_id, webpage)
4110 headers = self.generate_api_headers(
fe93e2c4 4111 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 4112 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4113 visitor_data=try_get(
4114 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4115 query = {
4116 'params': params or 'wgYCCAA=',
4117 'browseId': browse_id or 'VL%s' % item_id
4118 }
4119 return self._extract_response(
4120 item_id=item_id, headers=headers, query=query,
fe93e2c4 4121 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4122 note='Downloading API JSON with unavailable videos')
358de58c 4123
cd7c66cf 4124 def _extract_webpage(self, url, item_id):
a06916d9 4125 retries = self.get_param('extractor_retries', 3)
62bff2c1 4126 count = -1
c705177d 4127 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4128 while count < retries:
62bff2c1 4129 count += 1
14fdfea9 4130 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4131 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4132 if count:
c705177d 4133 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4134 webpage = self._download_webpage(
4135 url, item_id,
cd7c66cf 4136 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
11f9be09 4137 data = self.extract_yt_initial_data(item_id, webpage)
14fdfea9 4138 if data.get('contents') or data.get('currentVideoEndpoint'):
4139 break
95c01b6c 4140 # Extract alerts here only when there is error
4141 self._extract_and_report_alerts(data)
c705177d 4142 if count >= retries:
6a39ee13 4143 raise ExtractorError(last_error)
cd7c66cf 4144 return webpage, data
4145
9297939e 4146 @staticmethod
4147 def _smuggle_data(entries, data):
4148 for entry in entries:
4149 if data:
4150 entry['url'] = smuggle_url(entry['url'], data)
4151 yield entry
4152
cd7c66cf 4153 def _real_extract(self, url):
9297939e 4154 url, smuggled_data = unsmuggle_url(url, {})
4155 if self.is_music_url(url):
4156 smuggled_data['is_music_url'] = True
fe03a6cd 4157 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4158 if info_dict.get('entries'):
4159 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4160 return info_dict
4161
fe03a6cd 4162 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4163
4164 def __real_extract(self, url, smuggled_data):
cd7c66cf 4165 item_id = self._match_id(url)
4166 url = compat_urlparse.urlunparse(
4167 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4168 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4169
fe03a6cd 4170 def get_mobj(url):
4171 mobj = self._url_re.match(url).groupdict()
07cce701 4172 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4173 return mobj
4174
4175 mobj = get_mobj(url)
4176 # Youtube returns incomplete data if tabname is not lower case
4177 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4178
4179 if is_channel:
4180 if smuggled_data.get('is_music_url'):
4181 if item_id[:2] == 'VL':
4182 # Youtube music VL channels have an equivalent playlist
4183 item_id = item_id[2:]
4184 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4185 elif item_id[:2] == 'MP':
4186 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4187 item_id = self._search_regex(
4188 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4189 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4190 'playlist id')
4191 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4192 elif mobj['channel_type'] == 'browse':
4193 # Youtube music /browse/ should be changed to /channel/
4194 pre = 'https://www.youtube.com/channel/%s' % item_id
4195 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4196 # Home URLs should redirect to /videos/
6a39ee13 4197 self.report_warning(
cd7c66cf 4198 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4199 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4200 tab = '/videos'
4201
4202 url = ''.join((pre, tab, post))
4203 mobj = get_mobj(url)
cd7c66cf 4204
4205 # Handle both video/playlist URLs
201c1459 4206 qs = parse_qs(url)
cd7c66cf 4207 video_id = qs.get('v', [None])[0]
4208 playlist_id = qs.get('list', [None])[0]
4209
fe03a6cd 4210 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4211 if not playlist_id:
fe03a6cd 4212 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4213 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4214 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4215 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4216 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4217 mobj = get_mobj(url)
cd7c66cf 4218
4219 if video_id and playlist_id:
a06916d9 4220 if self.get_param('noplaylist'):
cd7c66cf 4221 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4222 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4223 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4224
4225 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4226
18db7548 4227 tabs = try_get(
4228 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4229 if tabs:
4230 selected_tab = self._extract_selected_tab(tabs)
4231 tab_name = selected_tab.get('title', '')
09f1580e 4232 if 'no-youtube-channel-redirect' not in compat_opts:
4233 if mobj['tab'] == '/live':
4234 # Live tab should have redirected to the video
4235 raise ExtractorError('The channel is not currently live', expected=True)
4236 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4237 if not mobj['not_channel'] and item_id[:2] == 'UC':
4238 # Topic channels don't have /videos. Use the equivalent playlist instead
4239 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4240 pl_id = 'UU%s' % item_id[2:]
4241 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4242 try:
4243 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4244 for alert_type, alert_message in self._extract_alerts(pl_data):
4245 if alert_type == 'error':
4246 raise ExtractorError('Youtube said: %s' % alert_message)
4247 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4248 except ExtractorError:
4249 self.report_warning('The playlist gave error. Falling back to channel URL')
4250 else:
4251 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4252
4253 self.write_debug('Final URL: %s' % url)
4254
358de58c 4255 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4256 if 'no-youtube-unavailable-videos' not in compat_opts:
4257 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4258 self._extract_and_report_alerts(data)
8bdd16b4 4259 tabs = try_get(
4260 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4261 if tabs:
d069eca7 4262 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4263
8bdd16b4 4264 playlist = try_get(
4265 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4266 if playlist:
79360d99 4267 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4268
a0566bbf 4269 video_id = try_get(
4270 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4271 compat_str) or video_id
8bdd16b4 4272 if video_id:
09f1580e 4273 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4274 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4275 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4276
8bdd16b4 4277 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4278
c5e8d7af 4279
8bdd16b4 4280class YoutubePlaylistIE(InfoExtractor):
4281 IE_DESC = 'YouTube.com playlists'
4282 _VALID_URL = r'''(?x)(?:
4283 (?:https?://)?
4284 (?:\w+\.)?
4285 (?:
4286 (?:
4287 youtube(?:kids)?\.com|
29f7c58a 4288 invidio\.us
8bdd16b4 4289 )
4290 /.*?\?.*?\blist=
4291 )?
4292 (?P<id>%(playlist_id)s)
4293 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4294 IE_NAME = 'youtube:playlist'
cdc628a4 4295 _TESTS = [{
8bdd16b4 4296 'note': 'issue #673',
4297 'url': 'PLBB231211A4F62143',
cdc628a4 4298 'info_dict': {
8bdd16b4 4299 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4300 'id': 'PLBB231211A4F62143',
4301 'uploader': 'Wickydoo',
4302 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4303 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4304 },
4305 'playlist_mincount': 29,
4306 }, {
4307 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4308 'info_dict': {
4309 'title': 'YDL_safe_search',
4310 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4311 },
4312 'playlist_count': 2,
4313 'skip': 'This playlist is private',
9558dcec 4314 }, {
8bdd16b4 4315 'note': 'embedded',
4316 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4317 'playlist_count': 4,
9558dcec 4318 'info_dict': {
8bdd16b4 4319 'title': 'JODA15',
4320 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4321 'uploader': 'milan',
4322 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4323 }
cdc628a4 4324 }, {
8bdd16b4 4325 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4326 'playlist_mincount': 654,
8bdd16b4 4327 'info_dict': {
4328 'title': '2018 Chinese New Singles (11/6 updated)',
4329 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4330 'uploader': 'LBK',
4331 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4332 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4333 }
daa0df9e 4334 }, {
29f7c58a 4335 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4336 'only_matching': True,
4337 }, {
4338 # music album playlist
4339 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4340 'only_matching': True,
4341 }]
4342
4343 @classmethod
4344 def suitable(cls, url):
201c1459 4345 if YoutubeTabIE.suitable(url):
4346 return False
1bdae7d3 4347 # Hack for lazy extractors until more generic solution is implemented
4348 # (see #28780)
4349 from .youtube import parse_qs
201c1459 4350 qs = parse_qs(url)
4351 if qs.get('v', [None])[0]:
4352 return False
4353 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4354
4355 def _real_extract(self, url):
4356 playlist_id = self._match_id(url)
46953e7e 4357 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4358 url = update_url_query(
4359 'https://www.youtube.com/playlist',
4360 parse_qs(url) or {'list': playlist_id})
4361 if is_music_url:
4362 url = smuggle_url(url, {'is_music_url': True})
4363 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4364
4365
4366class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4367 IE_DESC = 'youtu.be'
29f7c58a 4368 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4369 _TESTS = [{
8bdd16b4 4370 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4371 'info_dict': {
4372 'id': 'yeWKywCrFtk',
4373 'ext': 'mp4',
4374 'title': 'Small Scale Baler and Braiding Rugs',
4375 'uploader': 'Backus-Page House Museum',
4376 'uploader_id': 'backuspagemuseum',
4377 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4378 'upload_date': '20161008',
4379 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4380 'categories': ['Nonprofits & Activism'],
4381 'tags': list,
4382 'like_count': int,
4383 'dislike_count': int,
4384 },
4385 'params': {
4386 'noplaylist': True,
4387 'skip_download': True,
4388 },
39e7107d 4389 }, {
8bdd16b4 4390 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4391 'only_matching': True,
cdc628a4
PH
4392 }]
4393
8bdd16b4 4394 def _real_extract(self, url):
29f7c58a 4395 mobj = re.match(self._VALID_URL, url)
4396 video_id = mobj.group('id')
4397 playlist_id = mobj.group('playlist_id')
8bdd16b4 4398 return self.url_result(
29f7c58a 4399 update_url_query('https://www.youtube.com/watch', {
4400 'v': video_id,
4401 'list': playlist_id,
4402 'feature': 'youtu.be',
4403 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4404
4405
4406class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4407 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4408 _VALID_URL = r'ytuser:(?P<id>.+)'
4409 _TESTS = [{
4410 'url': 'ytuser:phihag',
4411 'only_matching': True,
4412 }]
4413
4414 def _real_extract(self, url):
4415 user_id = self._match_id(url)
4416 return self.url_result(
4417 'https://www.youtube.com/user/%s' % user_id,
4418 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4419
b05654f0 4420
3d3dddc9 4421class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4422 IE_NAME = 'youtube:favorites'
4423 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4424 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4425 _LOGIN_REQUIRED = True
4426 _TESTS = [{
4427 'url': ':ytfav',
4428 'only_matching': True,
4429 }, {
4430 'url': ':ytfavorites',
4431 'only_matching': True,
4432 }]
4433
4434 def _real_extract(self, url):
4435 return self.url_result(
4436 'https://www.youtube.com/playlist?list=LL',
4437 ie=YoutubeTabIE.ie_key())
4438
4439
79360d99 4440class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4441 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4442 # there doesn't appear to be a real limit, for example if you search for
4443 # 'python' you get more than 8.000.000 results
4444 _MAX_RESULTS = float('inf')
78caa52a 4445 IE_NAME = 'youtube:search'
b05654f0 4446 _SEARCH_KEY = 'ytsearch'
6c894ea1 4447 _SEARCH_PARAMS = None
9dd8e46a 4448 _TESTS = []
b05654f0 4449
6c894ea1 4450 def _entries(self, query, n):
a5c56234 4451 data = {'query': query}
6c894ea1
U
4452 if self._SEARCH_PARAMS:
4453 data['params'] = self._SEARCH_PARAMS
4454 total = 0
fe93e2c4 4455 continuation = {}
6c894ea1 4456 for page_num in itertools.count(1):
fe93e2c4 4457 data.update(continuation)
79360d99 4458 search = self._extract_response(
4459 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4460 check_get_keys=('contents', 'onResponseReceivedCommands')
4461 )
6c894ea1 4462 if not search:
b4c08069 4463 break
6c894ea1
U
4464 slr_contents = try_get(
4465 search,
4466 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4467 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4468 list)
4469 if not slr_contents:
a22b2fd1 4470 break
0366ae87 4471
0366ae87
M
4472 # Youtube sometimes adds promoted content to searches,
4473 # changing the index location of videos and token.
4474 # So we search through all entries till we find them.
fe93e2c4 4475 continuation = None
30a074c2 4476 for slr_content in slr_contents:
fe93e2c4 4477 if not continuation:
4478 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4479
30a074c2 4480 isr_contents = try_get(
4481 slr_content,
4482 lambda x: x['itemSectionRenderer']['contents'],
4483 list)
9da76d30 4484 if not isr_contents:
30a074c2 4485 continue
4486 for content in isr_contents:
4487 if not isinstance(content, dict):
4488 continue
4489 video = content.get('videoRenderer')
4490 if not isinstance(video, dict):
4491 continue
4492 video_id = video.get('videoId')
4493 if not video_id:
4494 continue
4495
4496 yield self._extract_video(video)
4497 total += 1
4498 if total == n:
4499 return
0366ae87 4500
fe93e2c4 4501 if not continuation:
6c894ea1 4502 break
b05654f0 4503
6c894ea1
U
4504 def _get_n_results(self, query, n):
4505 """Get a specified number of results for a query"""
11f9be09 4506 return self.playlist_result(self._entries(query, n), query, query)
75dff0ee 4507
c9ae7b95 4508
a3dd9248 4509class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4510 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4511 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4512 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4513 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4514
c9ae7b95 4515
386e1dd9 4516class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4517 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4518 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4519 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4520 # _MAX_RESULTS = 100
3462ffa8 4521 _TESTS = [{
4522 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4523 'playlist_mincount': 5,
4524 'info_dict': {
11f9be09 4525 'id': 'youtube-dl test video',
3462ffa8 4526 'title': 'youtube-dl test video',
4527 }
4528 }, {
4529 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4530 'only_matching': True,
4531 }]
4532
386e1dd9 4533 @classmethod
4534 def _make_valid_url(cls):
4535 return cls._VALID_URL
4536
3462ffa8 4537 def _real_extract(self, url):
386e1dd9 4538 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4539 query = (qs.get('search_query') or qs.get('q'))[0]
4540 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4541 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4542
4543
4544class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4545 """
25f14e9f 4546 Base class for feed extractors
3d3dddc9 4547 Subclasses must define the _FEED_NAME property.
d7ae0639 4548 """
b2e8bc1b 4549 _LOGIN_REQUIRED = True
ef2f3c7f 4550 _TESTS = []
d7ae0639
JMF
4551
4552 @property
4553 def IE_NAME(self):
78caa52a 4554 return 'youtube:%s' % self._FEED_NAME
04cc9617 4555
3853309f 4556 def _real_extract(self, url):
3d3dddc9 4557 return self.url_result(
4558 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4559 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4560
4561
ef2f3c7f 4562class YoutubeWatchLaterIE(InfoExtractor):
4563 IE_NAME = 'youtube:watchlater'
70d5c17b 4564 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4565 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4566 _TESTS = [{
8bdd16b4 4567 'url': ':ytwatchlater',
bc7a9cd8
S
4568 'only_matching': True,
4569 }]
25f14e9f
S
4570
4571 def _real_extract(self, url):
ef2f3c7f 4572 return self.url_result(
4573 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4574
4575
25f14e9f
S
4576class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4577 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4578 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4579 _FEED_NAME = 'recommended'
45db527f 4580 _LOGIN_REQUIRED = False
3d3dddc9 4581 _TESTS = [{
4582 'url': ':ytrec',
4583 'only_matching': True,
4584 }, {
4585 'url': ':ytrecommended',
4586 'only_matching': True,
4587 }, {
4588 'url': 'https://youtube.com',
4589 'only_matching': True,
4590 }]
1ed5b5c9 4591
1ed5b5c9 4592
25f14e9f 4593class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4594 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4595 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4596 _FEED_NAME = 'subscriptions'
3d3dddc9 4597 _TESTS = [{
4598 'url': ':ytsubs',
4599 'only_matching': True,
4600 }, {
4601 'url': ':ytsubscriptions',
4602 'only_matching': True,
4603 }]
1ed5b5c9 4604
1ed5b5c9 4605
25f14e9f 4606class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4607 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4608 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4609 _FEED_NAME = 'history'
3d3dddc9 4610 _TESTS = [{
4611 'url': ':ythistory',
4612 'only_matching': True,
4613 }]
1ed5b5c9
JMF
4614
4615
15870e90
PH
4616class YoutubeTruncatedURLIE(InfoExtractor):
4617 IE_NAME = 'youtube:truncated_url'
4618 IE_DESC = False # Do not list
975d35db 4619 _VALID_URL = r'''(?x)
b95aab84
PH
4620 (?:https?://)?
4621 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4622 (?:watch\?(?:
c4808c60 4623 feature=[a-z_]+|
b95aab84
PH
4624 annotation_id=annotation_[^&]+|
4625 x-yt-cl=[0-9]+|
c1708b89 4626 hl=[^&]*|
287be8c6 4627 t=[0-9]+
b95aab84
PH
4628 )?
4629 |
4630 attribution_link\?a=[^&]+
4631 )
4632 $
975d35db 4633 '''
15870e90 4634
c4808c60 4635 _TESTS = [{
2d3d2997 4636 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4637 'only_matching': True,
dc2fc736 4638 }, {
2d3d2997 4639 'url': 'https://www.youtube.com/watch?',
dc2fc736 4640 'only_matching': True,
b95aab84
PH
4641 }, {
4642 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4643 'only_matching': True,
4644 }, {
4645 'url': 'https://www.youtube.com/watch?feature=foo',
4646 'only_matching': True,
c1708b89
PH
4647 }, {
4648 'url': 'https://www.youtube.com/watch?hl=en-GB',
4649 'only_matching': True,
287be8c6
PH
4650 }, {
4651 'url': 'https://www.youtube.com/watch?t=2372',
4652 'only_matching': True,
c4808c60
PH
4653 }]
4654
15870e90
PH
4655 def _real_extract(self, url):
4656 raise ExtractorError(
78caa52a
PH
4657 'Did you forget to quote the URL? Remember that & is a meta '
4658 'character in most shells, so you want to put the URL in quotes, '
3867038a 4659 'like youtube-dl '
2d3d2997 4660 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4661 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4662 expected=True)
772fd5cc
PH
4663
4664
4665class YoutubeTruncatedIDIE(InfoExtractor):
4666 IE_NAME = 'youtube:truncated_id'
4667 IE_DESC = False # Do not list
b95aab84 4668 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4669
4670 _TESTS = [{
4671 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4672 'only_matching': True,
4673 }]
4674
4675 def _real_extract(self, url):
4676 video_id = self._match_id(url)
4677 raise ExtractorError(
4678 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4679 expected=True)