]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube] Add `shorts` to `_VALID_URL`
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
641ad5d8 41 is_html,
94278f72 42 mimetype2ext,
9c0d7f49 43 network_exceptions,
11f9be09 44 orderedSet,
6310acf5 45 parse_codecs,
49bd8c66 46 parse_count,
7c80519c 47 parse_duration,
7ea65411 48 parse_iso8601,
dca3ff4a 49 qualities,
3995d37d 50 remove_start,
cf7e015f 51 smuggle_url,
dbdaaa23 52 str_or_none,
c93d53f5 53 str_to_int,
7c365c21 54 traverse_obj,
556dbe7f 55 try_get,
c5e8d7af
PH
56 unescapeHTML,
57 unified_strdate,
cf7e015f 58 unsmuggle_url,
8bdd16b4 59 update_url_query,
21c340b8 60 url_or_none,
6e6bc8da 61 urlencode_postdata,
fe93e2c4 62 urljoin,
7c365c21 63 variadic,
c5e8d7af
PH
64)
65
5f6a1245 66
201c1459 67def parse_qs(url):
68 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
69
70
000c15a4 71# any clients starting with _ cannot be explicity requested by the user
72INNERTUBE_CLIENTS = {
73 'web': {
74 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
75 'INNERTUBE_CONTEXT': {
76 'client': {
77 'clientName': 'WEB',
78 'clientVersion': '2.20210622.10.00',
79 }
80 },
81 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
82 },
83 'web_embedded': {
84 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
85 'INNERTUBE_CONTEXT': {
86 'client': {
87 'clientName': 'WEB_EMBEDDED_PLAYER',
88 'clientVersion': '1.20210620.0.1',
89 },
90 },
91 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
92 },
93 'web_music': {
94 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
95 'INNERTUBE_HOST': 'music.youtube.com',
96 'INNERTUBE_CONTEXT': {
97 'client': {
98 'clientName': 'WEB_REMIX',
99 'clientVersion': '1.20210621.00.00',
100 }
101 },
102 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
103 },
e7e94f2a
D
104 'web_creator': {
105 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
106 'INNERTUBE_CONTEXT': {
107 'client': {
108 'clientName': 'WEB_CREATOR',
109 'clientVersion': '1.20210621.00.00',
110 }
111 },
112 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
113 },
000c15a4 114 'android': {
115 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
116 'INNERTUBE_CONTEXT': {
117 'client': {
118 'clientName': 'ANDROID',
119 'clientVersion': '16.20',
120 }
121 },
122 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
123 },
124 'android_embedded': {
125 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
126 'INNERTUBE_CONTEXT': {
127 'client': {
128 'clientName': 'ANDROID_EMBEDDED_PLAYER',
129 'clientVersion': '16.20',
130 },
131 },
132 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
133 },
134 'android_music': {
135 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
136 'INNERTUBE_HOST': 'music.youtube.com',
137 'INNERTUBE_CONTEXT': {
138 'client': {
139 'clientName': 'ANDROID_MUSIC',
140 'clientVersion': '4.32',
141 }
142 },
143 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
144 },
e7e94f2a
D
145 'android_creator': {
146 'INNERTUBE_CONTEXT': {
147 'client': {
148 'clientName': 'ANDROID_CREATOR',
149 'clientVersion': '21.24.100',
150 },
151 },
152 'INNERTUBE_CONTEXT_CLIENT_NAME': 14
153 },
3619f78d 154 # ios has HLS live streams
155 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
000c15a4 156 'ios': {
157 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
158 'INNERTUBE_CONTEXT': {
159 'client': {
160 'clientName': 'IOS',
161 'clientVersion': '16.20',
162 }
163 },
164 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
165 },
166 'ios_embedded': {
167 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
168 'INNERTUBE_CONTEXT': {
169 'client': {
170 'clientName': 'IOS_MESSAGES_EXTENSION',
171 'clientVersion': '16.20',
172 },
173 },
174 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
175 },
176 'ios_music': {
177 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
178 'INNERTUBE_HOST': 'music.youtube.com',
179 'INNERTUBE_CONTEXT': {
180 'client': {
181 'clientName': 'IOS_MUSIC',
182 'clientVersion': '4.32',
183 },
184 },
185 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
186 },
e7e94f2a
D
187 'ios_creator': {
188 'INNERTUBE_CONTEXT': {
189 'client': {
190 'clientName': 'IOS_CREATOR',
191 'clientVersion': '21.24.100',
192 },
193 },
194 'INNERTUBE_CONTEXT_CLIENT_NAME': 15
195 },
3619f78d 196 # mweb has 'ultralow' formats
197 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 198 'mweb': {
199 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
200 'INNERTUBE_CONTEXT': {
201 'client': {
202 'clientName': 'MWEB',
203 'clientVersion': '2.20210721.07.00',
204 }
205 },
206 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
207 },
208}
209
210
211def build_innertube_clients():
65c2fde2 212 third_party = {
213 'embedUrl': 'https://google.com', # Can be any valid URL
214 }
000c15a4 215 base_clients = ('android', 'web', 'ios', 'mweb')
216 priority = qualities(base_clients[::-1])
217
218 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 219 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 220 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
221 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
222 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
223
224 if client in base_clients:
225 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
226 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
65c2fde2 227 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 228 agegate_ytcfg['priority'] -= 1
229 elif client.endswith('_embedded'):
65c2fde2 230 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 231 ytcfg['priority'] -= 2
232 else:
233 ytcfg['priority'] -= 3
234
235
236build_innertube_clients()
237
238
de7f3446 239class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 240 """Provide base functions for Youtube extractors"""
e00eb564 241
3462ffa8 242 _RESERVED_NAMES = (
3619f78d 243 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
244 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
245 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 246 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 247
3619f78d 248 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
249
b2e8bc1b 250 _NETRC_MACHINE = 'youtube'
3619f78d 251
b2e8bc1b
JMF
252 # If True it will raise an error if no login info is provided
253 _LOGIN_REQUIRED = False
254
3619f78d 255 r''' # Unused since login is broken
256 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
257 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
258
259 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
260 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
261 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
262 '''
d0ba5587 263
b2e8bc1b 264 def _login(self):
83317f69 265 """
266 Attempt to log in to YouTube.
267 True is returned if successful or skipped.
268 False is returned if login failed.
269
270 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
271 """
9d5d4d64 272
273 def warn(message):
274 self.report_warning(message)
275
276 # username+password login is broken
982ee69a
MB
277 if (self._LOGIN_REQUIRED
278 and self.get_param('cookiefile') is None
279 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 280 self.raise_login_required(
281 'Login details are needed to download this content', method='cookies')
68217024 282 username, password = self._get_login_info()
9d5d4d64 283 if username:
284 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
285 return
9d5d4d64 286
2d6659b9 287 # Everything below this is broken!
288 r'''
b2e8bc1b
JMF
289 # No authentication to be performed
290 if username is None:
a06916d9 291 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 292 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 293 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 294 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 295 return True
b2e8bc1b 296
7cc3570e
PH
297 login_page = self._download_webpage(
298 self._LOGIN_URL, None,
69ea8ca4
PH
299 note='Downloading login page',
300 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
301 if login_page is False:
302 return
b2e8bc1b 303
1212e997 304 login_form = self._hidden_inputs(login_page)
c5e8d7af 305
e00eb564
S
306 def req(url, f_req, note, errnote):
307 data = login_form.copy()
308 data.update({
309 'pstMsg': 1,
310 'checkConnection': 'youtube',
311 'checkedDomains': 'youtube',
312 'hl': 'en',
313 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 314 'f.req': json.dumps(f_req),
e00eb564
S
315 'flowName': 'GlifWebSignIn',
316 'flowEntry': 'ServiceLogin',
baf67a60
S
317 # TODO: reverse actual botguard identifier generation algo
318 'bgRequest': '["identifier",""]',
041bc3ad 319 })
e00eb564
S
320 return self._download_json(
321 url, None, note=note, errnote=errnote,
322 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
323 fatal=False,
324 data=urlencode_postdata(data), headers={
325 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
326 'Google-Accounts-XSRF': 1,
327 })
328
3995d37d
S
329 lookup_req = [
330 username,
331 None, [], None, 'US', None, None, 2, False, True,
332 [
333 None, None,
334 [2, 1, None, 1,
335 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
336 None, [], 4],
337 1, [None, None, []], None, None, None, True
338 ],
339 username,
340 ]
341
e00eb564 342 lookup_results = req(
3995d37d 343 self._LOOKUP_URL, lookup_req,
e00eb564
S
344 'Looking up account info', 'Unable to look up account info')
345
346 if lookup_results is False:
347 return False
041bc3ad 348
3995d37d
S
349 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
350 if not user_hash:
351 warn('Unable to extract user hash')
352 return False
353
354 challenge_req = [
355 user_hash,
356 None, 1, None, [1, None, None, None, [password, None, True]],
357 [
358 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
359 1, [None, None, []], None, None, None, True
360 ]]
83317f69 361
3995d37d
S
362 challenge_results = req(
363 self._CHALLENGE_URL, challenge_req,
364 'Logging in', 'Unable to log in')
83317f69 365
3995d37d 366 if challenge_results is False:
e00eb564 367 return
83317f69 368
3995d37d
S
369 login_res = try_get(challenge_results, lambda x: x[0][5], list)
370 if login_res:
371 login_msg = try_get(login_res, lambda x: x[5], compat_str)
372 warn(
373 'Unable to login: %s' % 'Invalid password'
374 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
375 return False
376
377 res = try_get(challenge_results, lambda x: x[0][-1], list)
378 if not res:
379 warn('Unable to extract result entry')
380 return False
381
9a6628aa
S
382 login_challenge = try_get(res, lambda x: x[0][0], list)
383 if login_challenge:
384 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
385 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
386 # SEND_SUCCESS - TFA code has been successfully sent to phone
387 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 388 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
389 if status == 'QUOTA_EXCEEDED':
390 warn('Exceeded the limit of TFA codes, try later')
391 return False
392
393 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
394 if not tl:
395 warn('Unable to extract TL')
396 return False
397
398 tfa_code = self._get_tfa_info('2-step verification code')
399
400 if not tfa_code:
401 warn(
402 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
403 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
404 return False
405
406 tfa_code = remove_start(tfa_code, 'G-')
407
408 tfa_req = [
409 user_hash, None, 2, None,
410 [
411 9, None, None, None, None, None, None, None,
412 [None, tfa_code, True, 2]
413 ]]
414
415 tfa_results = req(
416 self._TFA_URL.format(tl), tfa_req,
417 'Submitting TFA code', 'Unable to submit TFA code')
418
419 if tfa_results is False:
420 return False
421
422 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
423 if tfa_res:
424 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
425 warn(
426 'Unable to finish TFA: %s' % 'Invalid TFA code'
427 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
428 return False
429
430 check_cookie_url = try_get(
431 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
432 else:
433 CHALLENGES = {
434 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
435 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
436 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
437 }
438 challenge = CHALLENGES.get(
439 challenge_str,
440 '%s returned error %s.' % (self.IE_NAME, challenge_str))
441 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
442 return False
3995d37d
S
443 else:
444 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
445
446 if not check_cookie_url:
447 warn('Unable to extract CheckCookie URL')
448 return False
e00eb564
S
449
450 check_cookie_results = self._download_webpage(
3995d37d
S
451 check_cookie_url, None, 'Checking cookie', fatal=False)
452
453 if check_cookie_results is False:
454 return False
e00eb564 455
3995d37d
S
456 if 'https://myaccount.google.com/' not in check_cookie_results:
457 warn('Unable to log in')
b2e8bc1b 458 return False
e00eb564 459
b2e8bc1b 460 return True
2d6659b9 461 '''
b2e8bc1b 462
cce889b9 463 def _initialize_consent(self):
464 cookies = self._get_cookies('https://www.youtube.com/')
465 if cookies.get('__Secure-3PSID'):
466 return
467 consent_id = None
468 consent = cookies.get('CONSENT')
469 if consent:
470 if 'YES' in consent.value:
471 return
472 consent_id = self._search_regex(
473 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
474 if not consent_id:
475 consent_id = random.randint(100, 999)
476 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 477
b2e8bc1b 478 def _real_initialize(self):
cce889b9 479 self._initialize_consent()
b2e8bc1b
JMF
480 if self._downloader is None:
481 return
b2e8bc1b
JMF
482 if not self._login():
483 return
c5e8d7af 484
a0566bbf 485 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 486 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
487 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 488
000c15a4 489 def _get_default_ytcfg(self, client='web'):
490 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 491
000c15a4 492 def _get_innertube_host(self, client='web'):
493 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 494
000c15a4 495 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 496 # try_get but with fallback to default ytcfg client values when present
497 _func = lambda y: try_get(y, getter, expected_type)
498 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
499
000c15a4 500 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 501 return self._ytcfg_get_safe(
502 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
503 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 504
314ee305 505 @staticmethod
11f9be09 506 def _extract_session_index(*data):
507 for ytcfg in data:
508 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
509 if session_index is not None:
510 return session_index
314ee305 511
000c15a4 512 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 513 return self._ytcfg_get_safe(
514 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
515 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 516
000c15a4 517 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 518 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
519
000c15a4 520 def _extract_context(self, ytcfg=None, default_client='web'):
109dd3b2 521 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
522 context = _get_context(ytcfg)
523 if context:
524 return context
525
526 context = _get_context(self._get_default_ytcfg(default_client))
527 if not ytcfg:
528 return context
529
530 # Recreate the client context (required)
531 context['client'].update({
532 'clientVersion': self._extract_client_version(ytcfg, default_client),
533 'clientName': self._extract_client_name(ytcfg, default_client),
534 })
535 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
536 if visitor_data:
537 context['client']['visitorData'] = visitor_data
538 return context
539
cf87314d 540 _SAPISID = None
541
109dd3b2 542 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 543 time_now = round(time.time())
cf87314d 544 if self._SAPISID is None:
545 yt_cookies = self._get_cookies('https://www.youtube.com')
546 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
547 # See: https://github.com/yt-dlp/yt-dlp/issues/393
548 sapisid_cookie = dict_get(
549 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
550 if sapisid_cookie and sapisid_cookie.value:
551 self._SAPISID = sapisid_cookie.value
552 self.write_debug('Extracted SAPISID cookie')
553 # SAPISID cookie is required if not already present
554 if not yt_cookies.get('SAPISID'):
555 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
556 self._set_cookie(
557 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
558 else:
559 self._SAPISID = False
560 if not self._SAPISID:
561 return None
1974e99f 562 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
563 sapisidhash = hashlib.sha1(
cf87314d 564 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
1974e99f 565 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
566
567 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 568 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 569 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 570
109dd3b2 571 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 572 data.update(query)
11f9be09 573 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 574 real_headers.update({'content-type': 'application/json'})
575 if headers:
576 real_headers.update(headers)
545cc85d 577 return self._download_json(
109dd3b2 578 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 579 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 580 data=json.dumps(data).encode('utf8'), headers=real_headers,
581 query={'key': api_key or self._extract_api_key()})
582
11f9be09 583 def extract_yt_initial_data(self, video_id, webpage):
8bdd16b4 584 return self._parse_json(
585 self._search_regex(
29f7c58a 586 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 587 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 588 video_id)
0c148415 589
a1c5d2ca 590 def _extract_identity_token(self, webpage, item_id):
11f9be09 591 if not webpage:
592 return None
593 ytcfg = self.extract_ytcfg(item_id, webpage)
a1c5d2ca
M
594 if ytcfg:
595 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
596 if token:
597 return token
598 return self._search_regex(
599 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
600 'identity token', default=None)
601
602 @staticmethod
fe93e2c4 603 def _extract_account_syncid(*args):
8ea3f7b9 604 """
605 Extract syncId required to download private playlists of secondary channels
fe93e2c4 606 @params response and/or ytcfg
8ea3f7b9 607 """
fe93e2c4 608 for data in args:
609 # ytcfg includes channel_syncid if on secondary channel
610 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
611 if delegated_sid:
612 return delegated_sid
613 sync_ids = (try_get(
614 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
615 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
616 if len(sync_ids) >= 2 and sync_ids[1]:
617 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
618 # and just "user_syncid||" for primary channel. We only want the channel_syncid
619 return sync_ids[0]
a1c5d2ca 620
11f9be09 621 def extract_ytcfg(self, video_id, webpage):
8c54a305 622 if not webpage:
623 return {}
29f7c58a 624 return self._parse_json(
625 self._search_regex(
626 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 627 default='{}'), video_id, fatal=False) or {}
628
11f9be09 629 def generate_api_headers(
630 self, ytcfg=None, identity_token=None, account_syncid=None,
000c15a4 631 visitor_data=None, api_hostname=None, default_client='web', session_index=None):
11f9be09 632 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 633 headers = {
109dd3b2 634 'X-YouTube-Client-Name': compat_str(
11f9be09 635 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
636 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
109dd3b2 637 'Origin': origin
f4f751af 638 }
2d6659b9 639 if not visitor_data and ytcfg:
640 visitor_data = try_get(
11f9be09 641 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 642 if identity_token:
109dd3b2 643 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 644 if account_syncid:
645 headers['X-Goog-PageId'] = account_syncid
314ee305 646 if session_index is None and ytcfg:
647 session_index = self._extract_session_index(ytcfg)
648 if account_syncid or session_index is not None:
649 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 650 if visitor_data:
109dd3b2 651 headers['X-Goog-Visitor-Id'] = visitor_data
652 auth = self._generate_sapisidhash_header(origin)
f4f751af 653 if auth is not None:
654 headers['Authorization'] = auth
109dd3b2 655 headers['X-Origin'] = origin
f4f751af 656 return headers
29f7c58a 657
2d6659b9 658 @staticmethod
659 def _build_api_continuation_query(continuation, ctp=None):
660 query = {
661 'continuation': continuation
662 }
663 # TODO: Inconsistency with clickTrackingParams.
664 # Currently we have a fixed ctp contained within context (from ytcfg)
665 # and a ctp in root query for continuation.
666 if ctp:
667 query['clickTracking'] = {'clickTrackingParams': ctp}
668 return query
669
2d6659b9 670 @classmethod
671 def _extract_next_continuation_data(cls, renderer):
672 next_continuation = try_get(
673 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
674 lambda x: x['continuation']['reloadContinuationData']), dict)
675 if not next_continuation:
676 return
677 continuation = next_continuation.get('continuation')
678 if not continuation:
679 return
680 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 681 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 682
683 @classmethod
684 def _extract_continuation_ep_data(cls, continuation_ep: dict):
685 if isinstance(continuation_ep, dict):
686 continuation = try_get(
687 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
688 if not continuation:
689 return
690 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 691 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 692
693 @classmethod
694 def _extract_continuation(cls, renderer):
695 next_continuation = cls._extract_next_continuation_data(renderer)
696 if next_continuation:
697 return next_continuation
fe93e2c4 698
2d6659b9 699 contents = []
700 for key in ('contents', 'items'):
701 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 702
2d6659b9 703 for content in contents:
704 if not isinstance(content, dict):
705 continue
706 continuation_ep = try_get(
707 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
708 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
709 dict)
710 continuation = cls._extract_continuation_ep_data(continuation_ep)
711 if continuation:
712 return continuation
713
fe93e2c4 714 @classmethod
715 def _extract_alerts(cls, data):
109dd3b2 716 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
717 if not isinstance(alert_dict, dict):
718 continue
719 for alert in alert_dict.values():
720 alert_type = alert.get('type')
721 if not alert_type:
722 continue
052e1350 723 message = cls._get_text(alert, 'text')
109dd3b2 724 if message:
725 yield alert_type, message
726
641ad5d8 727 def _report_alerts(self, alerts, expected=True, fatal=True):
109dd3b2 728 errors = []
729 warnings = []
730 for alert_type, alert_message in alerts:
641ad5d8 731 if alert_type.lower() == 'error' and fatal:
109dd3b2 732 errors.append([alert_type, alert_message])
733 else:
734 warnings.append([alert_type, alert_message])
735
736 for alert_type, alert_message in (warnings + errors[:-1]):
737 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
738 if errors:
739 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
740
741 def _extract_and_report_alerts(self, data, *args, **kwargs):
742 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
743
47193e02 744 def _extract_badges(self, renderer: dict):
745 badges = set()
746 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
747 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
748 if label:
749 badges.add(label.lower())
750 return badges
751
752 @staticmethod
052e1350 753 def _get_text(data, *path_list, max_runs=None):
754 for path in path_list or [None]:
755 if path is None:
756 obj = [data]
757 else:
758 obj = traverse_obj(data, path, default=[])
759 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
760 obj = [obj]
761 for item in obj:
762 text = try_get(item, lambda x: x['simpleText'], compat_str)
763 if text:
764 return text
765 runs = try_get(item, lambda x: x['runs'], list) or []
766 if not runs and isinstance(item, list):
767 runs = item
768
769 runs = runs[:min(len(runs), max_runs or len(runs))]
770 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
771 if text:
772 return text
47193e02 773
109dd3b2 774 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
775 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 776 default_client='web'):
109dd3b2 777 response = None
778 last_error = None
779 count = -1
780 retries = self.get_param('extractor_retries', 3)
781 if check_get_keys is None:
782 check_get_keys = []
783 while count < retries:
784 count += 1
785 if last_error:
786 self.report_warning('%s. Retrying ...' % last_error)
787 try:
788 response = self._call_api(
789 ep=ep, fatal=True, headers=headers,
790 video_id=item_id, query=query,
791 context=self._extract_context(ytcfg, default_client),
792 api_key=self._extract_api_key(ytcfg, default_client),
793 api_hostname=api_hostname, default_client=default_client,
794 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
795 except ExtractorError as e:
9c0d7f49 796 if isinstance(e.cause, network_exceptions):
641ad5d8 797 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
798 e.cause.seek(0)
799 yt_error = try_get(
800 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
801 lambda x: x['error']['message'], compat_str)
802 if yt_error:
803 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 804 # Downloading page may result in intermittent 5xx HTTP error
805 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 806 # We also want to catch all other network exceptions since errors in later pages can be troublesome
807 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
808 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
809 last_error = error_to_compat_str(e.cause or e)
810 if count < retries:
811 continue
109dd3b2 812 if fatal:
813 raise
814 else:
815 self.report_warning(error_to_compat_str(e))
816 return
817
818 else:
819 # Youtube may send alerts if there was an issue with the continuation page
820 try:
821 self._extract_and_report_alerts(response, expected=False)
822 except ExtractorError as e:
823 if fatal:
824 raise
825 self.report_warning(error_to_compat_str(e))
826 return
827 if not check_get_keys or dict_get(response, check_get_keys):
828 break
829 # Youtube sometimes sends incomplete data
830 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
831 last_error = 'Incomplete data received'
832 if count >= retries:
833 if fatal:
834 raise ExtractorError(last_error)
835 else:
836 self.report_warning(last_error)
837 return
838 return response
839
9297939e 840 @staticmethod
841 def is_music_url(url):
842 return re.match(r'https?://music\.youtube\.com/', url) is not None
843
30a074c2 844 def _extract_video(self, renderer):
845 video_id = renderer.get('videoId')
052e1350 846 title = self._get_text(renderer, 'title')
847 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 848 duration = parse_duration(self._get_text(
849 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 850 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 851 view_count = str_to_int(self._search_regex(
852 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
853 'view count', default=None))
fe93e2c4 854
052e1350 855 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
fe93e2c4 856
30a074c2 857 return {
39ed931e 858 '_type': 'url',
30a074c2 859 'ie_key': YoutubeIE.ie_key(),
860 'id': video_id,
861 'url': video_id,
862 'title': title,
863 'description': description,
864 'duration': duration,
865 'view_count': view_count,
866 'uploader': uploader,
867 }
868
0c148415 869
360e1ca5 870class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 871 IE_DESC = 'YouTube.com'
bc2ca1bb 872 _INVIDIOUS_SITES = (
873 # invidious-redirect websites
874 r'(?:www\.)?redirect\.invidious\.io',
875 r'(?:(?:www|dev)\.)?invidio\.us',
876 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
877 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 878 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 879 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 880 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 881 # youtube-dl invidious instances list
882 r'(?:(?:www|no)\.)?invidiou\.sh',
883 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
884 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 885 r'(?:www\.)?invidious\.mastodon\.host',
886 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 887 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 888 r'(?:www\.)?invidious\.tinfoil-hat\.net',
889 r'(?:www\.)?invidious\.himiko\.cloud',
890 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 891 r'(?:www\.)?invidious\.tube',
892 r'(?:www\.)?invidiou\.site',
893 r'(?:www\.)?invidious\.site',
894 r'(?:www\.)?invidious\.xyz',
895 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 896 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 897 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 898 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 899 r'(?:www\.)?tube\.poal\.co',
900 r'(?:www\.)?tube\.connect\.cafe',
901 r'(?:www\.)?vid\.wxzm\.sx',
902 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 903 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 904 r'(?:www\.)?yewtu\.be',
905 r'(?:www\.)?yt\.elukerio\.org',
906 r'(?:www\.)?yt\.lelux\.fi',
907 r'(?:www\.)?invidious\.ggc-project\.de',
908 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 909 r'(?:www\.)?ytprivate\.com',
910 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 911 r'(?:www\.)?invidious\.toot\.koeln',
912 r'(?:www\.)?invidious\.fdn\.fr',
913 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 914 r'(?:www\.)?invidious\.namazso\.eu',
915 r'(?:www\.)?invidious\.silkky\.cloud',
916 r'(?:www\.)?invidious\.exonip\.de',
917 r'(?:www\.)?invidious\.riverside\.rocks',
918 r'(?:www\.)?invidious\.blamefran\.net',
919 r'(?:www\.)?invidious\.moomoo\.de',
920 r'(?:www\.)?ytb\.trom\.tf',
921 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 922 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
923 r'(?:www\.)?qklhadlycap4cnod\.onion',
924 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
925 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
926 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
927 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
928 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
929 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 930 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
931 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
932 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
933 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 934 )
cb7dfeea 935 _VALID_URL = r"""(?x)^
c5e8d7af 936 (
edb53e2d 937 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 938 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
939 (?:www\.)?deturl\.com/www\.youtube\.com|
940 (?:www\.)?pwnyoutube\.com|
941 (?:www\.)?hooktube\.com|
942 (?:www\.)?yourepeat\.com|
943 tube\.majestyc\.net|
944 %(invidious)s|
945 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
946 (?:.*?\#/)? # handle anchor (#/) redirect urls
947 (?: # the various things that can precede the ID:
8fc54b12 948 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
c5e8d7af 949 |(?: # or the v= param in all its forms
f7000f3a 950 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 951 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 952 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
953 v=
954 )
f4b05232 955 ))
cbaed4bb
S
956 |(?:
957 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
958 vid\.plus| # or vid.plus/xxxx
959 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 960 %(invidious)s
cbaed4bb 961 )/
edb53e2d 962 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 963 )
c5e8d7af 964 )? # all until now is optional -> you can pass the naked ID
201c1459 965 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 966 (?(1).+)? # if we found the ID, everything can follow
9297939e 967 (?:\#|$)""" % {
bc2ca1bb 968 'invidious': '|'.join(_INVIDIOUS_SITES),
969 }
e40c758c 970 _PLAYER_INFO_RE = (
cc2db878 971 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
972 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 973 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 974 )
2c62dc26 975 _formats = {
c2d3cb4c 976 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
977 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
978 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
979 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
980 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
981 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
982 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
983 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 984 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 985 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
986 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
987 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
988 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
989 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
990 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 991 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 992 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
993 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 994
995
996 # 3D videos
c2d3cb4c 997 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
998 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
999 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1000 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1001 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1002 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1003 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1004
96fb5605 1005 # Apple HTTP Live Streaming
11f12195 1006 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1007 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1008 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1009 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1010 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1011 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1012 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1013 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1014
1015 # DASH mp4 video
d23028a8
S
1016 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1017 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1018 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1019 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1020 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1021 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1022 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1023 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1024 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1025 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1026 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1027 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1028
f6f1fc92 1029 # Dash mp4 audio
d23028a8
S
1030 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1031 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1032 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1033 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1034 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1035 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1036 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1037
1038 # Dash webm
d23028a8
S
1039 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1040 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1041 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1042 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1043 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1044 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1045 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1046 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1047 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1048 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1049 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1050 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1051 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1052 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1053 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1054 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1055 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1056 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1057 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1058 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1059 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1060 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1061
1062 # Dash webm audio
d23028a8
S
1063 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1064 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1065
0857baad 1066 # Dash webm audio with opus inside
d23028a8
S
1067 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1068 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1069 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1070
ce6b9a2d
PH
1071 # RTMP (unnamed)
1072 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1073
1074 # av01 video only formats sometimes served with "unknown" codecs
1075 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1076 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1077 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1078 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 1079 }
29f7c58a 1080 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1081
fd5c4aab
S
1082 _GEO_BYPASS = False
1083
78caa52a 1084 IE_NAME = 'youtube'
2eb88d95
PH
1085 _TESTS = [
1086 {
2d3d2997 1087 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1088 'info_dict': {
1089 'id': 'BaW_jenozKc',
1090 'ext': 'mp4',
3867038a 1091 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1092 'uploader': 'Philipp Hagemeister',
1093 'uploader_id': 'phihag',
ec85ded8 1094 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
1095 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1096 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1097 'upload_date': '20121002',
3867038a 1098 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 1099 'categories': ['Science & Technology'],
3867038a 1100 'tags': ['youtube-dl'],
556dbe7f 1101 'duration': 10,
dbdaaa23 1102 'view_count': int,
3e7c1224
PH
1103 'like_count': int,
1104 'dislike_count': int,
7c80519c 1105 'start_time': 1,
297a564b 1106 'end_time': 9,
2eb88d95 1107 }
0e853ca4 1108 },
fccd3771 1109 {
4bc3a23e
PH
1110 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1111 'note': 'Embed-only video (#1746)',
1112 'info_dict': {
1113 'id': 'yZIXLfi8CZQ',
1114 'ext': 'mp4',
1115 'upload_date': '20120608',
1116 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1117 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1118 'uploader': 'SET India',
94bfcd23 1119 'uploader_id': 'setindia',
ec85ded8 1120 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1121 'age_limit': 18,
545cc85d 1122 },
1123 'skip': 'Private video',
fccd3771 1124 },
11b56058 1125 {
8bdd16b4 1126 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1127 'note': 'Use the first video ID in the URL',
1128 'info_dict': {
1129 'id': 'BaW_jenozKc',
1130 'ext': 'mp4',
3867038a 1131 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1132 'uploader': 'Philipp Hagemeister',
1133 'uploader_id': 'phihag',
ec85ded8 1134 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1135 'upload_date': '20121002',
3867038a 1136 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1137 'categories': ['Science & Technology'],
3867038a 1138 'tags': ['youtube-dl'],
556dbe7f 1139 'duration': 10,
dbdaaa23 1140 'view_count': int,
11b56058
PM
1141 'like_count': int,
1142 'dislike_count': int,
34a7de29
S
1143 },
1144 'params': {
1145 'skip_download': True,
1146 },
11b56058 1147 },
dd27fd17 1148 {
2d3d2997 1149 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1150 'note': '256k DASH audio (format 141) via DASH manifest',
1151 'info_dict': {
1152 'id': 'a9LDPn-MO4I',
1153 'ext': 'm4a',
1154 'upload_date': '20121002',
1155 'uploader_id': '8KVIDEO',
ec85ded8 1156 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1157 'description': '',
1158 'uploader': '8KVIDEO',
1159 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1160 },
4bc3a23e
PH
1161 'params': {
1162 'youtube_include_dash_manifest': True,
1163 'format': '141',
4919603f 1164 },
de3c7fe0 1165 'skip': 'format 141 not served anymore',
dd27fd17 1166 },
8bdd16b4 1167 # DASH manifest with encrypted signature
1168 {
1169 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1170 'info_dict': {
1171 'id': 'IB3lcPjvWLA',
1172 'ext': 'm4a',
1173 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1174 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1175 'duration': 244,
1176 'uploader': 'AfrojackVEVO',
1177 'uploader_id': 'AfrojackVEVO',
1178 'upload_date': '20131011',
cc2db878 1179 'abr': 129.495,
8bdd16b4 1180 },
1181 'params': {
1182 'youtube_include_dash_manifest': True,
1183 'format': '141/bestaudio[ext=m4a]',
1184 },
1185 },
65c2fde2 1186 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1187 {
65c2fde2 1188 'note': 'Embed allowed age-gate video',
2d3d2997 1189 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1190 'info_dict': {
1191 'id': 'HtVdAasjOgU',
1192 'ext': 'mp4',
1193 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1194 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1195 'duration': 142,
c522adb1
JMF
1196 'uploader': 'The Witcher',
1197 'uploader_id': 'WitcherGame',
ec85ded8 1198 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1199 'upload_date': '20140605',
34952f09 1200 'age_limit': 18,
c522adb1
JMF
1201 },
1202 },
65c2fde2 1203 {
1204 'note': 'Age-gate video with embed allowed in public site',
1205 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1206 'info_dict': {
1207 'id': 'HsUATh_Nc2U',
1208 'ext': 'mp4',
1209 'title': 'Godzilla 2 (Official Video)',
1210 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1211 'upload_date': '20200408',
1212 'uploader_id': 'FlyingKitty900',
1213 'uploader': 'FlyingKitty',
1214 'age_limit': 18,
1215 },
1216 },
1217 {
1218 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1219 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1220 'info_dict': {
1221 'id': 'Tq92D6wQ1mg',
1222 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1223 'ext': 'mp4',
1224 'upload_date': '20191227',
65c2fde2 1225 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1226 'uploader': 'Projekt Melody',
1227 'description': 'md5:17eccca93a786d51bc67646756894066',
1228 'age_limit': 18,
1229 },
1230 },
1231 {
1232 'note': 'Non-Agegated non-embeddable video',
1233 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1234 'info_dict': {
1235 'id': 'MeJVWBSsPAY',
1236 'ext': 'mp4',
1237 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1238 'uploader': 'Herr Lurik',
1239 'uploader_id': 'st3in234',
1240 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1241 'upload_date': '20130730',
1242 },
1243 },
1244 {
1245 'note': 'Non-bypassable age-gated video',
1246 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1247 'only_matching': True,
1248 },
8bdd16b4 1249 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1250 # YouTube Red ad is not captured for creator
1251 {
1252 'url': '__2ABJjxzNo',
1253 'info_dict': {
1254 'id': '__2ABJjxzNo',
1255 'ext': 'mp4',
1256 'duration': 266,
1257 'upload_date': '20100430',
1258 'uploader_id': 'deadmau5',
1259 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1260 'creator': 'deadmau5',
1261 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1262 'uploader': 'deadmau5',
1263 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1264 'alt_title': 'Some Chords',
8bdd16b4 1265 },
1266 'expected_warnings': [
1267 'DASH manifest missing',
1268 ]
1269 },
067aa17e 1270 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1271 {
1272 'url': 'lqQg6PlCWgI',
1273 'info_dict': {
1274 'id': 'lqQg6PlCWgI',
1275 'ext': 'mp4',
556dbe7f 1276 'duration': 6085,
90227264 1277 'upload_date': '20150827',
cbe2bd91 1278 'uploader_id': 'olympic',
ec85ded8 1279 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1280 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1281 'uploader': 'Olympics',
cbe2bd91
PH
1282 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1283 },
1284 'params': {
1285 'skip_download': 'requires avconv',
e52a40ab 1286 }
cbe2bd91 1287 },
6271f1ca
PH
1288 # Non-square pixels
1289 {
1290 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1291 'info_dict': {
1292 'id': '_b-2C3KPAM0',
1293 'ext': 'mp4',
1294 'stretched_ratio': 16 / 9.,
556dbe7f 1295 'duration': 85,
6271f1ca
PH
1296 'upload_date': '20110310',
1297 'uploader_id': 'AllenMeow',
ec85ded8 1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1299 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1300 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1301 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1302 },
06b491eb
S
1303 },
1304 # url_encoded_fmt_stream_map is empty string
1305 {
1306 'url': 'qEJwOuvDf7I',
1307 'info_dict': {
1308 'id': 'qEJwOuvDf7I',
f57b7835 1309 'ext': 'webm',
06b491eb
S
1310 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1311 'description': '',
1312 'upload_date': '20150404',
1313 'uploader_id': 'spbelect',
1314 'uploader': 'Наблюдатели Петербурга',
1315 },
1316 'params': {
1317 'skip_download': 'requires avconv',
e323cf3f
S
1318 },
1319 'skip': 'This live event has ended.',
06b491eb 1320 },
067aa17e 1321 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1322 {
1323 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1324 'info_dict': {
1325 'id': 'FIl7x6_3R5Y',
eb6793ba 1326 'ext': 'webm',
da77d856
S
1327 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1328 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1329 'duration': 220,
da77d856
S
1330 'upload_date': '20150625',
1331 'uploader_id': 'dorappi2000',
ec85ded8 1332 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1333 'uploader': 'dorappi2000',
eb6793ba 1334 'formats': 'mincount:31',
da77d856 1335 },
eb6793ba 1336 'skip': 'not actual anymore',
2ee8f5d8 1337 },
8a1a26ce
YCH
1338 # DASH manifest with segment_list
1339 {
1340 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1341 'md5': '8ce563a1d667b599d21064e982ab9e31',
1342 'info_dict': {
1343 'id': 'CsmdDsKjzN8',
1344 'ext': 'mp4',
17ee98e1 1345 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1346 'uploader': 'Airtek',
1347 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1348 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1349 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1350 },
1351 'params': {
1352 'youtube_include_dash_manifest': True,
1353 'format': '135', # bestvideo
be49068d
S
1354 },
1355 'skip': 'This live event has ended.',
2ee8f5d8 1356 },
cf7e015f
S
1357 {
1358 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1359 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1360 'info_dict': {
545cc85d 1361 'id': 'jvGDaLqkpTg',
1362 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1363 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1364 },
1365 'playlist': [{
1366 'info_dict': {
545cc85d 1367 'id': 'jvGDaLqkpTg',
cf7e015f 1368 'ext': 'mp4',
545cc85d 1369 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1370 'description': 'md5:e03b909557865076822aa169218d6a5d',
1371 'duration': 10643,
1372 'upload_date': '20161111',
1373 'uploader': 'Team PGP',
1374 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1375 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1376 },
1377 }, {
1378 'info_dict': {
545cc85d 1379 'id': '3AKt1R1aDnw',
cf7e015f 1380 'ext': 'mp4',
545cc85d 1381 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1382 'description': 'md5:e03b909557865076822aa169218d6a5d',
1383 'duration': 10991,
1384 'upload_date': '20161111',
1385 'uploader': 'Team PGP',
1386 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1387 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1388 },
1389 }, {
1390 'info_dict': {
545cc85d 1391 'id': 'RtAMM00gpVc',
cf7e015f 1392 'ext': 'mp4',
545cc85d 1393 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1394 'description': 'md5:e03b909557865076822aa169218d6a5d',
1395 'duration': 10995,
1396 'upload_date': '20161111',
1397 'uploader': 'Team PGP',
1398 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1399 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1400 },
1401 }, {
1402 'info_dict': {
545cc85d 1403 'id': '6N2fdlP3C5U',
cf7e015f 1404 'ext': 'mp4',
545cc85d 1405 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1406 'description': 'md5:e03b909557865076822aa169218d6a5d',
1407 'duration': 10990,
1408 'upload_date': '20161111',
1409 'uploader': 'Team PGP',
1410 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1411 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1412 },
1413 }],
1414 'params': {
1415 'skip_download': True,
1416 },
65c2fde2 1417 'skip': 'Not multifeed anymore',
cbaed4bb 1418 },
f9f49d87 1419 {
067aa17e 1420 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1421 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1422 'info_dict': {
1423 'id': 'gVfLd0zydlo',
1424 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1425 },
1426 'playlist_count': 2,
be49068d 1427 'skip': 'Not multifeed anymore',
f9f49d87 1428 },
cbaed4bb 1429 {
2d3d2997 1430 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1431 'only_matching': True,
0e49d9a6 1432 },
6d4fc66b 1433 {
2d3d2997 1434 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1435 'only_matching': True,
1436 },
0e49d9a6 1437 {
067aa17e 1438 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1439 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1440 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1441 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1442 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1443 'info_dict': {
1444 'id': 'lsguqyKfVQg',
1445 'ext': 'mp4',
1446 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1447 'alt_title': 'Dark Walk',
0e49d9a6 1448 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1449 'duration': 133,
0e49d9a6
LL
1450 'upload_date': '20151119',
1451 'uploader_id': 'IronSoulElf',
ec85ded8 1452 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1453 'uploader': 'IronSoulElf',
11f9be09 1454 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1455 'track': 'Dark Walk',
1456 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1457 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1458 },
1459 'params': {
1460 'skip_download': True,
1461 },
1462 },
61f92af1 1463 {
067aa17e 1464 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1465 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1466 'only_matching': True,
1467 },
313dfc45
LL
1468 {
1469 # Video with yt:stretch=17:0
1470 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1471 'info_dict': {
1472 'id': 'Q39EVAstoRM',
1473 'ext': 'mp4',
1474 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1475 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1476 'upload_date': '20151107',
1477 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1478 'uploader': 'CH GAMER DROID',
1479 },
1480 'params': {
1481 'skip_download': True,
1482 },
be49068d 1483 'skip': 'This video does not exist.',
313dfc45 1484 },
201c1459 1485 {
1486 # Video with incomplete 'yt:stretch=16:'
1487 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1488 'only_matching': True,
1489 },
7caf9830
S
1490 {
1491 # Video licensed under Creative Commons
1492 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1493 'info_dict': {
1494 'id': 'M4gD1WSo5mA',
1495 'ext': 'mp4',
1496 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1497 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1498 'duration': 721,
7caf9830
S
1499 'upload_date': '20150127',
1500 'uploader_id': 'BerkmanCenter',
ec85ded8 1501 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1502 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1503 'license': 'Creative Commons Attribution license (reuse allowed)',
1504 },
1505 'params': {
1506 'skip_download': True,
1507 },
1508 },
fd050249
S
1509 {
1510 # Channel-like uploader_url
1511 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1512 'info_dict': {
1513 'id': 'eQcmzGIKrzg',
1514 'ext': 'mp4',
1515 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1516 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1517 'duration': 4060,
fd050249 1518 'upload_date': '20151119',
eb6793ba 1519 'uploader': 'Bernie Sanders',
fd050249 1520 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1521 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1522 'license': 'Creative Commons Attribution license (reuse allowed)',
1523 },
1524 'params': {
1525 'skip_download': True,
1526 },
1527 },
040ac686
S
1528 {
1529 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1530 'only_matching': True,
7f29cf54
S
1531 },
1532 {
067aa17e 1533 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1534 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1535 'only_matching': True,
6496ccb4
S
1536 },
1537 {
1538 # Rental video preview
1539 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1540 'info_dict': {
1541 'id': 'uGpuVWrhIzE',
1542 'ext': 'mp4',
1543 'title': 'Piku - Trailer',
1544 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1545 'upload_date': '20150811',
1546 'uploader': 'FlixMatrix',
1547 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1548 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1549 'license': 'Standard YouTube License',
1550 },
1551 'params': {
1552 'skip_download': True,
1553 },
eb6793ba 1554 'skip': 'This video is not available.',
022a5d66 1555 },
12afdc2a
S
1556 {
1557 # YouTube Red video with episode data
1558 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1559 'info_dict': {
1560 'id': 'iqKdEhx-dD4',
1561 'ext': 'mp4',
1562 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1563 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1564 'duration': 2085,
12afdc2a
S
1565 'upload_date': '20170118',
1566 'uploader': 'Vsauce',
1567 'uploader_id': 'Vsauce',
1568 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1569 'series': 'Mind Field',
1570 'season_number': 1,
1571 'episode_number': 1,
1572 },
1573 'params': {
1574 'skip_download': True,
1575 },
1576 'expected_warnings': [
1577 'Skipping DASH manifest',
1578 ],
1579 },
c7121fa7
S
1580 {
1581 # The following content has been identified by the YouTube community
1582 # as inappropriate or offensive to some audiences.
1583 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1584 'info_dict': {
1585 'id': '6SJNVb0GnPI',
1586 'ext': 'mp4',
1587 'title': 'Race Differences in Intelligence',
1588 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1589 'duration': 965,
1590 'upload_date': '20140124',
1591 'uploader': 'New Century Foundation',
1592 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1593 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1594 },
1595 'params': {
1596 'skip_download': True,
1597 },
545cc85d 1598 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1599 },
022a5d66
S
1600 {
1601 # itag 212
1602 'url': '1t24XAntNCY',
1603 'only_matching': True,
fd5c4aab
S
1604 },
1605 {
1606 # geo restricted to JP
1607 'url': 'sJL6WA-aGkQ',
1608 'only_matching': True,
1609 },
cd5a74a2
S
1610 {
1611 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1612 'only_matching': True,
1613 },
bc2ca1bb 1614 {
1615 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1616 'only_matching': True,
1617 },
1618 {
1619 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1620 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1621 'only_matching': True,
1622 },
825cd268
RA
1623 {
1624 # DRM protected
1625 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1626 'only_matching': True,
4fe54c12
S
1627 },
1628 {
1629 # Video with unsupported adaptive stream type formats
1630 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1631 'info_dict': {
1632 'id': 'Z4Vy8R84T1U',
1633 'ext': 'mp4',
1634 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1635 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1636 'duration': 433,
1637 'upload_date': '20130923',
1638 'uploader': 'Amelia Putri Harwita',
1639 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1640 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1641 'formats': 'maxcount:10',
1642 },
1643 'params': {
1644 'skip_download': True,
1645 'youtube_include_dash_manifest': False,
1646 },
5429d6a9 1647 'skip': 'not actual anymore',
5caabd3c 1648 },
1649 {
822b9d9c 1650 # Youtube Music Auto-generated description
5caabd3c 1651 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1652 'info_dict': {
1653 'id': 'MgNrAu2pzNs',
1654 'ext': 'mp4',
1655 'title': 'Voyeur Girl',
1656 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1657 'upload_date': '20190312',
5429d6a9
S
1658 'uploader': 'Stephen - Topic',
1659 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1660 'artist': 'Stephen',
1661 'track': 'Voyeur Girl',
1662 'album': 'it\'s too much love to know my dear',
1663 'release_date': '20190313',
1664 'release_year': 2019,
1665 },
1666 'params': {
1667 'skip_download': True,
1668 },
1669 },
66b48727
RA
1670 {
1671 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1672 'only_matching': True,
1673 },
011e75e6
S
1674 {
1675 # invalid -> valid video id redirection
1676 'url': 'DJztXj2GPfl',
1677 'info_dict': {
1678 'id': 'DJztXj2GPfk',
1679 'ext': 'mp4',
1680 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1681 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1682 'upload_date': '20090125',
1683 'uploader': 'Prochorowka',
1684 'uploader_id': 'Prochorowka',
1685 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1686 'artist': 'Panjabi MC',
1687 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1688 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1689 },
1690 'params': {
1691 'skip_download': True,
1692 },
545cc85d 1693 'skip': 'Video unavailable',
ea74e00b
DP
1694 },
1695 {
1696 # empty description results in an empty string
1697 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1698 'info_dict': {
1699 'id': 'x41yOUIvK2k',
1700 'ext': 'mp4',
1701 'title': 'IMG 3456',
1702 'description': '',
1703 'upload_date': '20170613',
1704 'uploader_id': 'ElevageOrVert',
1705 'uploader': 'ElevageOrVert',
1706 },
1707 'params': {
1708 'skip_download': True,
1709 },
1710 },
a0566bbf 1711 {
29f7c58a 1712 # with '};' inside yt initial data (see [1])
1713 # see [2] for an example with '};' inside ytInitialPlayerResponse
1714 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1715 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1716 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1717 'info_dict': {
1718 'id': 'CHqg6qOn4no',
1719 'ext': 'mp4',
1720 'title': 'Part 77 Sort a list of simple types in c#',
1721 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1722 'upload_date': '20130831',
1723 'uploader_id': 'kudvenkat',
1724 'uploader': 'kudvenkat',
1725 },
1726 'params': {
1727 'skip_download': True,
1728 },
1729 },
29f7c58a 1730 {
1731 # another example of '};' in ytInitialData
1732 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1733 'only_matching': True,
1734 },
1735 {
1736 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1737 'only_matching': True,
1738 },
545cc85d 1739 {
cc2db878 1740 # https://github.com/ytdl-org/youtube-dl/pull/28094
1741 'url': 'OtqTfy26tG0',
1742 'info_dict': {
1743 'id': 'OtqTfy26tG0',
1744 'ext': 'mp4',
1745 'title': 'Burn Out',
1746 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1747 'upload_date': '20141120',
1748 'uploader': 'The Cinematic Orchestra - Topic',
1749 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1750 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1751 'artist': 'The Cinematic Orchestra',
1752 'track': 'Burn Out',
1753 'album': 'Every Day',
1754 'release_data': None,
1755 'release_year': None,
1756 },
1757 'params': {
1758 'skip_download': True,
1759 },
545cc85d 1760 },
bc2ca1bb 1761 {
1762 # controversial video, only works with bpctr when authenticated with cookies
1763 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1764 'only_matching': True,
1765 },
a1a7907b 1766 {
1767 # controversial video, requires bpctr/contentCheckOk
1768 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1769 'info_dict': {
1770 'id': 'SZJvDhaSDnc',
1771 'ext': 'mp4',
1772 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1773 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1774 'uploader': 'CBS This Morning',
11f9be09 1775 'uploader_id': 'CBSThisMorning',
a1a7907b 1776 'upload_date': '20140716',
1777 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1778 }
1779 },
f7ad7160 1780 {
1781 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1782 'url': 'cBvYw8_A0vQ',
1783 'info_dict': {
1784 'id': 'cBvYw8_A0vQ',
1785 'ext': 'mp4',
1786 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1787 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1788 'upload_date': '20201120',
1789 'uploader': 'Walk around Japan',
1790 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1791 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1792 },
1793 'params': {
1794 'skip_download': True,
1795 },
0fb983f6 1796 }, {
1797 # Has multiple audio streams
1798 'url': 'WaOKSUlf4TM',
1799 'only_matching': True
9297939e 1800 }, {
1801 # Requires Premium: has format 141 when requested using YTM url
1802 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1803 'only_matching': True
1804 }, {
120916da 1805 # multiple subtitles with same lang_code
1806 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1807 'only_matching': True,
109dd3b2 1808 }, {
1809 # Force use android client fallback
1810 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1811 'info_dict': {
1812 'id': 'YOelRv7fMxY',
11f9be09 1813 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1814 'ext': '3gp',
1815 'upload_date': '20210624',
1816 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1817 'uploader': 'colinfurze',
11f9be09 1818 'uploader_id': 'colinfurze',
109dd3b2 1819 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1820 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1821 },
1822 'params': {
1823 'format': '17', # 3gp format available on android
1824 'extractor_args': {'youtube': {'player_client': ['android']}},
1825 },
120916da 1826 },
109dd3b2 1827 {
1828 # Skip download of additional client configs (remix client config in this case)
1829 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1830 'only_matching': True,
1831 'params': {
1832 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1833 },
8fc54b12 1834 }, {
1835 # shorts
1836 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1837 'only_matching': True,
1838 },
2eb88d95
PH
1839 ]
1840
201c1459 1841 @classmethod
1842 def suitable(cls, url):
1bdae7d3 1843 # Hack for lazy extractors until more generic solution is implemented
1844 # (see #28780)
1845 from .youtube import parse_qs
201c1459 1846 qs = parse_qs(url)
1847 if qs.get('list', [None])[0]:
1848 return False
1849 return super(YoutubeIE, cls).suitable(url)
1850
e0df6211
PH
1851 def __init__(self, *args, **kwargs):
1852 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1853 self._code_cache = {}
83799698 1854 self._player_cache = {}
e0df6211 1855
109dd3b2 1856 def _extract_player_url(self, ytcfg=None, webpage=None):
1857 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
11f9be09 1858 if not player_url and webpage:
109dd3b2 1859 player_url = self._search_regex(
1860 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1861 webpage, 'player URL', fatal=False)
11f9be09 1862 if not player_url:
1863 return None
109dd3b2 1864 if player_url.startswith('//'):
1865 player_url = 'https:' + player_url
1866 elif not re.match(r'https?://', player_url):
1867 player_url = compat_urlparse.urljoin(
1868 'https://www.youtube.com', player_url)
1869 return player_url
1870
60064c53
PH
1871 def _signature_cache_id(self, example_sig):
1872 """ Return a string representation of a signature """
78caa52a 1873 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1874
e40c758c
S
1875 @classmethod
1876 def _extract_player_info(cls, player_url):
1877 for player_re in cls._PLAYER_INFO_RE:
1878 id_m = re.search(player_re, player_url)
1879 if id_m:
1880 break
1881 else:
c081b35c 1882 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1883 return id_m.group('id')
e40c758c 1884
109dd3b2 1885 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1886 player_id = self._extract_player_info(player_url)
1887 if player_id not in self._code_cache:
1888 self._code_cache[player_id] = self._download_webpage(
1889 player_url, video_id, fatal=fatal,
1890 note='Downloading player ' + player_id,
1891 errnote='Download of %s failed' % player_url)
1892 return player_id in self._code_cache
1893
e40c758c 1894 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1895 player_id = self._extract_player_info(player_url)
e0df6211 1896
c4417ddb 1897 # Read from filesystem cache
545cc85d 1898 func_id = 'js_%s_%s' % (
1899 player_id, self._signature_cache_id(example_sig))
c4417ddb 1900 assert os.path.basename(func_id) == func_id
a0e07d31 1901
69ea8ca4 1902 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1903 if cache_spec is not None:
78caa52a 1904 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1905
109dd3b2 1906 if self._load_player(video_id, player_url):
1907 code = self._code_cache[player_id]
1908 res = self._parse_sig_js(code)
e0df6211 1909
109dd3b2 1910 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1911 cache_res = res(test_string)
1912 cache_spec = [ord(c) for c in cache_res]
83799698 1913
109dd3b2 1914 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1915 return res
83799698 1916
60064c53 1917 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1918 def gen_sig_code(idxs):
1919 def _genslice(start, end, step):
78caa52a 1920 starts = '' if start == 0 else str(start)
8bcc8756 1921 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1922 steps = '' if step == 1 else (':%d' % step)
78caa52a 1923 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1924
1925 step = None
7af808a5
PH
1926 # Quelch pyflakes warnings - start will be set when step is set
1927 start = '(Never used)'
edf3e38e
PH
1928 for i, prev in zip(idxs[1:], idxs[:-1]):
1929 if step is not None:
1930 if i - prev == step:
1931 continue
1932 yield _genslice(start, prev, step)
1933 step = None
1934 continue
1935 if i - prev in [-1, 1]:
1936 step = i - prev
1937 start = prev
1938 continue
1939 else:
78caa52a 1940 yield 's[%d]' % prev
edf3e38e 1941 if step is None:
78caa52a 1942 yield 's[%d]' % i
edf3e38e
PH
1943 else:
1944 yield _genslice(start, i, step)
1945
78caa52a 1946 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1947 cache_res = func(test_string)
edf3e38e 1948 cache_spec = [ord(c) for c in cache_res]
78caa52a 1949 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1950 signature_id_tuple = '(%s)' % (
1951 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1952 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1953 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1954 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1955
e0df6211
PH
1956 def _parse_sig_js(self, jscode):
1957 funcname = self._search_regex(
abefc03f
S
1958 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1959 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
1960 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1961 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1962 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1963 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1964 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1965 # Obsolete patterns
1966 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1967 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1968 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1969 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1970 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1971 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1972 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1973 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1974 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1975
1976 jsi = JSInterpreter(jscode)
1977 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1978 return lambda s: initial_function([s])
1979
545cc85d 1980 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1981 """Turn the encrypted s field into a working signature"""
6b37f0be 1982
c8bf86d5 1983 if player_url is None:
69ea8ca4 1984 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1985
c8bf86d5 1986 try:
62af3a0e 1987 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1988 if player_id not in self._player_cache:
1989 func = self._extract_signature_function(
60064c53 1990 video_id, player_url, s
c8bf86d5
PH
1991 )
1992 self._player_cache[player_id] = func
1993 func = self._player_cache[player_id]
a06916d9 1994 if self.get_param('youtube_print_sig_code'):
60064c53 1995 self._print_sig_code(func, s)
c8bf86d5
PH
1996 return func(s)
1997 except Exception as e:
1998 tb = traceback.format_exc()
1999 raise ExtractorError(
78caa52a 2000 'Signature extraction failed: ' + tb, cause=e)
e0df6211 2001
109dd3b2 2002 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2003 """
2004 Extract signatureTimestamp (sts)
2005 Required to tell API what sig/player version is in use.
2006 """
2007 sts = None
2008 if isinstance(ytcfg, dict):
2009 sts = int_or_none(ytcfg.get('STS'))
2010
2011 if not sts:
2012 # Attempt to extract from player
2013 if player_url is None:
2014 error_msg = 'Cannot extract signature timestamp without player_url.'
2015 if fatal:
2016 raise ExtractorError(error_msg)
2017 self.report_warning(error_msg)
2018 return
2019 if self._load_player(video_id, player_url, fatal=fatal):
2020 player_id = self._extract_player_info(player_url)
2021 code = self._code_cache[player_id]
2022 sts = int_or_none(self._search_regex(
2023 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2024 'JS player signature timestamp', group='sts', fatal=fatal))
2025 return sts
2026
11f9be09 2027 def _mark_watched(self, video_id, player_responses):
352d63fd 2028 playback_url = traverse_obj(
2029 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2030 expected_type=url_or_none, get_all=False)
d77ab8e2 2031 if not playback_url:
352d63fd 2032 self.report_warning('Unable to mark watched')
d77ab8e2
S
2033 return
2034 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2035 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2036
2037 # cpn generation algorithm is reverse engineered from base.js.
2038 # In fact it works even with dummy cpn.
2039 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2040 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2041
2042 qs.update({
2043 'ver': ['2'],
2044 'cpn': [cpn],
2045 })
2046 playback_url = compat_urlparse.urlunparse(
15707c7e 2047 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
2048
2049 self._download_webpage(
2050 playback_url, video_id, 'Marking watched',
2051 'Unable to mark watched', fatal=False)
2052
66c9fa36
S
2053 @staticmethod
2054 def _extract_urls(webpage):
2055 # Embedded YouTube player
2056 entries = [
2057 unescapeHTML(mobj.group('url'))
2058 for mobj in re.finditer(r'''(?x)
2059 (?:
2060 <iframe[^>]+?src=|
2061 data-video-url=|
2062 <embed[^>]+?src=|
2063 embedSWF\(?:\s*|
2064 <object[^>]+data=|
2065 new\s+SWFObject\(
2066 )
2067 (["\'])
2068 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2069 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2070 \1''', webpage)]
2071
2072 # lazyYT YouTube embed
2073 entries.extend(list(map(
2074 unescapeHTML,
2075 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2076
2077 # Wordpress "YouTube Video Importer" plugin
2078 matches = re.findall(r'''(?x)<div[^>]+
2079 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2080 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2081 entries.extend(m[-1] for m in matches)
2082
2083 return entries
2084
2085 @staticmethod
2086 def _extract_url(webpage):
2087 urls = YoutubeIE._extract_urls(webpage)
2088 return urls[0] if urls else None
2089
97665381
PH
2090 @classmethod
2091 def extract_id(cls, url):
2092 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2093 if mobj is None:
69ea8ca4 2094 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 2095 return mobj.group('id')
c5e8d7af 2096
7c365c21 2097 def _extract_chapters_from_json(self, data, duration):
2098 chapter_list = traverse_obj(
2099 data, (
2100 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2101 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2102 ), expected_type=list)
2103
2104 return self._extract_chapters(
2105 chapter_list,
2106 chapter_time=lambda chapter: float_or_none(
2107 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2108 chapter_title=lambda chapter: traverse_obj(
2109 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2110 duration=duration)
2111
2112 def _extract_chapters_from_engagement_panel(self, data, duration):
2113 content_list = traverse_obj(
8bdd16b4 2114 data,
7c365c21 2115 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2116 expected_type=list, default=[])
052e1350 2117 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2118 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2119
2120 return next((
2121 filter(None, (
2122 self._extract_chapters(
2123 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2124 chapter_time, chapter_title, duration)
2125 for contents in content_list
2126 ))), [])
2127
2128 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2129 chapters = []
7c365c21 2130 last_chapter = {'start_time': 0}
2131 for idx, chapter in enumerate(chapter_list or []):
2132 title = chapter_title(chapter)
84213ea8
S
2133 start_time = chapter_time(chapter)
2134 if start_time is None:
2135 continue
7c365c21 2136 last_chapter['end_time'] = start_time
2137 if start_time < last_chapter['start_time']:
2138 if idx == 1:
2139 chapters.pop()
2140 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2141 else:
2142 self.report_warning(f'Invalid start time for chapter "{title}"')
2143 continue
2144 last_chapter = {'start_time': start_time, 'title': title}
2145 chapters.append(last_chapter)
2146 last_chapter['end_time'] = duration
84213ea8
S
2147 return chapters
2148
545cc85d 2149 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2150 return self._parse_json(self._search_regex(
2151 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2152 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2153
d92f5d5a 2154 @staticmethod
2155 def parse_time_text(time_text):
2156 """
2157 Parse the comment time text
2158 time_text is in the format 'X units ago (edited)'
2159 """
2160 time_text_split = time_text.split(' ')
2161 if len(time_text_split) >= 3:
da503b7a 2162 try:
2163 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2164 except ValueError:
2165 return None
d92f5d5a 2166
a1c5d2ca
M
2167 def _extract_comment(self, comment_renderer, parent=None):
2168 comment_id = comment_renderer.get('commentId')
2169 if not comment_id:
2170 return
fe93e2c4 2171
052e1350 2172 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2173
49bd8c66 2174 # note: timestamp is an estimate calculated from the current time and time_text
052e1350 2175 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
fe93e2c4 2176 time_text_dt = self.parse_time_text(time_text)
2177 if isinstance(time_text_dt, datetime.datetime):
2178 timestamp = calendar.timegm(time_text_dt.timetuple())
052e1350 2179 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2180 author_id = try_get(comment_renderer,
2181 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2182
49bd8c66 2183 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2184 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2185 author_thumbnail = try_get(comment_renderer,
2186 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2187
2188 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2189 is_favorited = 'creatorHeart' in (try_get(
2190 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2191 return {
2192 'id': comment_id,
2193 'text': text,
d92f5d5a 2194 'timestamp': timestamp,
a1c5d2ca
M
2195 'time_text': time_text,
2196 'like_count': votes,
97524332 2197 'is_favorited': is_favorited,
a1c5d2ca
M
2198 'author': author,
2199 'author_id': author_id,
2200 'author_thumbnail': author_thumbnail,
2201 'author_is_uploader': author_is_uploader,
2202 'parent': parent or 'root'
2203 }
2204
2205 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2206 ytcfg, video_id, parent=None, comment_counts=None):
2207
2208 def extract_header(contents):
2209 _total_comments = 0
2210 _continuation = None
2211 for content in contents:
2212 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2213 expected_comment_count = parse_count(self._get_text(
052e1350 2214 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2215
2d6659b9 2216 if expected_comment_count:
fe93e2c4 2217 comment_counts[1] = expected_comment_count
2218 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2219 _total_comments = comment_counts[1]
2220 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2221 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2222
2223 sort_menu_item = try_get(
2224 comments_header_renderer,
2225 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2226 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2227
2228 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2229 if not _continuation:
2230 continue
2231
2232 sort_text = sort_menu_item.get('title')
2233 if isinstance(sort_text, compat_str):
2234 sort_text = sort_text.lower()
2235 else:
2236 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2237 self.to_screen('Sorting comments by %s' % sort_text)
2238 break
2239 return _total_comments, _continuation
a1c5d2ca 2240
2d6659b9 2241 def extract_thread(contents):
a1c5d2ca
M
2242 if not parent:
2243 comment_counts[2] = 0
2244 for content in contents:
2245 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2246 comment_renderer = try_get(
2247 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2248 content, (lambda x: x['commentRenderer'], dict))
2249
2250 if not comment_renderer:
2251 continue
2252 comment = self._extract_comment(comment_renderer, parent)
2253 if not comment:
2254 continue
2255 comment_counts[0] += 1
2256 yield comment
2257 # Attempt to get the replies
2258 comment_replies_renderer = try_get(
2259 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2260
2261 if comment_replies_renderer:
2262 comment_counts[2] += 1
2263 comment_entries_iter = self._comment_entries(
f4f751af 2264 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2265 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2266
2267 for reply_comment in comment_entries_iter:
2268 yield reply_comment
2269
2d6659b9 2270 # YouTube comments have a max depth of 2
2271 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2272 if max_depth == 1 and parent:
2273 return
a1c5d2ca
M
2274 if not comment_counts:
2275 # comment so far, est. total comments, current comment thread #
2276 comment_counts = [0, 0, 0]
a1c5d2ca 2277
2d6659b9 2278 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2279 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2280 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2281 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2282 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2283
2284 visitor_data = None
2285 is_first_continuation = parent is None
a1c5d2ca
M
2286
2287 for page_num in itertools.count(0):
2288 if not continuation:
2289 break
11f9be09 2290 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2291 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2292 if page_num == 0:
2293 if is_first_continuation:
2294 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2295 else:
2d6659b9 2296 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2297 comment_counts[2], comment_prog_str)
2298 else:
2299 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2300 ' ' if parent else '', ' replies' if parent else '',
2301 page_num, comment_prog_str)
2302
2303 response = self._extract_response(
fe93e2c4 2304 item_id=None, query=continuation,
2d6659b9 2305 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2306 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2307 if not response:
2308 break
f4f751af 2309 visitor_data = try_get(
2310 response,
2311 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2312 compat_str) or visitor_data
a1c5d2ca 2313
2d6659b9 2314 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2315
2d6659b9 2316 continuation = None
2317 if isinstance(continuation_contents, list):
2318 for continuation_section in continuation_contents:
2319 if not isinstance(continuation_section, dict):
2320 continue
2321 continuation_items = try_get(
2322 continuation_section,
2323 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2324 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2325 list) or []
2326 if is_first_continuation:
2327 total_comments, continuation = extract_header(continuation_items)
2328 if total_comments:
2329 yield total_comments
2330 is_first_continuation = False
2331 if continuation:
2332 break
2333 continue
2334 count = 0
2335 for count, entry in enumerate(extract_thread(continuation_items)):
2336 yield entry
2337 continuation = self._extract_continuation({'contents': continuation_items})
2338 if continuation:
2339 # Sometimes YouTube provides a continuation without any comments
2340 # In most cases we end up just downloading these with very little comments to come.
2341 if count == 0:
2342 if not parent:
2343 self.report_warning('No comments received - assuming end of comments')
2344 continuation = None
a1c5d2ca
M
2345 break
2346
2d6659b9 2347 # Deprecated response structure
2348 elif isinstance(continuation_contents, dict):
2349 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2350 for key, continuation_renderer in continuation_contents.items():
2351 if key not in known_continuation_renderers:
2352 continue
2353 if not isinstance(continuation_renderer, dict):
2354 continue
2355 if is_first_continuation:
2356 header_continuation_items = [continuation_renderer.get('header') or {}]
2357 total_comments, continuation = extract_header(header_continuation_items)
2358 if total_comments:
2359 yield total_comments
2360 is_first_continuation = False
2361 if continuation:
2362 break
a1c5d2ca 2363
2d6659b9 2364 # Sometimes YouTube provides a continuation without any comments
2365 # In most cases we end up just downloading these with very little comments to come.
2366 count = 0
2367 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2368 yield entry
2369 continuation = self._extract_continuation(continuation_renderer)
2370 if count == 0:
2371 if not parent:
2372 self.report_warning('No comments received - assuming end of comments')
2373 continuation = None
2374 break
a1c5d2ca 2375
2d6659b9 2376 @staticmethod
2377 def _generate_comment_continuation(video_id):
2378 """
2379 Generates initial comment section continuation token from given video id
2380 """
2381 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2382 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2383 new_continuation_intlist = list(itertools.chain.from_iterable(
2384 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2385 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2386
2387 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2388 """Entry for comment extraction"""
2d6659b9 2389 def _real_comment_extract(contents):
2390 if isinstance(contents, list):
2391 for entry in contents:
2392 for key, renderer in entry.items():
2393 if key not in known_entry_comment_renderers:
2394 continue
2395 yield from self._comment_entries(
2396 renderer, video_id=video_id, ytcfg=ytcfg,
2397 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2398 account_syncid=self._extract_account_syncid(ytcfg))
2399 break
a1c5d2ca 2400 comments = []
2d6659b9 2401 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2402 estimated_total = 0
2d6659b9 2403 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
65524694 2404 # Force English regardless of account setting to prevent parsing issues
2405 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2406 ytcfg = copy.deepcopy(ytcfg)
2407 traverse_obj(
2408 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2d6659b9 2409 try:
2410 for comment in _real_comment_extract(contents):
2411 if len(comments) >= max_comments:
2412 break
2413 if isinstance(comment, int):
2414 estimated_total = comment
2415 continue
2416 comments.append(comment)
2417 except KeyboardInterrupt:
2418 self.to_screen('Interrupted by user')
d92f5d5a 2419 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2420 return {
2421 'comments': comments,
2422 'comment_count': len(comments),
2423 }
2424
109dd3b2 2425 @staticmethod
2426 def _generate_player_context(sts=None):
2427 context = {
2428 'html5Preference': 'HTML5_PREF_WANTS',
2429 }
2430 if sts is not None:
2431 context['signatureTimestamp'] = sts
2432 return {
2433 'playbackContext': {
2434 'contentPlaybackContext': context
a1a7907b 2435 },
2fd226f6 2436 'contentCheckOk': True,
2437 'racyCheckOk': True
109dd3b2 2438 }
2439
e7e94f2a
D
2440 @staticmethod
2441 def _is_agegated(player_response):
2442 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2443 return True
e7e94f2a
D
2444
2445 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2446 AGE_GATE_REASONS = (
2447 'confirm your age', 'age-restricted', 'inappropriate', # reason
2448 'age_verification_required', 'age_check_required', # status
2449 )
2450 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2451
2452 @staticmethod
2453 def _is_unplayable(player_response):
2454 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2455
11f9be09 2456 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
109dd3b2 2457
11f9be09 2458 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2459 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2460 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2461 headers = self.generate_api_headers(
2462 player_ytcfg, identity_token, syncid,
000c15a4 2463 default_client=client, session_index=session_index)
9297939e 2464
11f9be09 2465 yt_query = {'videoId': video_id}
2466 yt_query.update(self._generate_player_context(sts))
2467 return self._extract_response(
2468 item_id=video_id, ep='player', query=yt_query,
379e44ed 2469 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 2470 default_client=client,
11f9be09 2471 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2472 ) or None
2473
11f9be09 2474 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2475 requested_clients = []
000c15a4 2476 allowed_clients = sorted(
2477 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2478 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2479 for client in self._configuration_arg('player_client'):
2480 if client in allowed_clients:
2481 requested_clients.append(client)
2482 elif client == 'all':
2483 requested_clients.extend(allowed_clients)
2484 else:
2485 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2486 if not requested_clients:
2487 requested_clients = ['android', 'web']
cf7e015f 2488
11f9be09 2489 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2490 requested_clients.extend(
e7e94f2a 2491 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 2492
11f9be09 2493 return orderedSet(requested_clients)
cf7e015f 2494
c0bc527b
M
2495 def _extract_player_ytcfg(self, client, video_id):
2496 url = {
2497 'web_music': 'https://music.youtube.com',
2498 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2499 }.get(client)
2500 if not url:
2501 return {}
2502 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2503 return self.extract_ytcfg(video_id, webpage) or {}
2504
11f9be09 2505 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2506 initial_pr = None
2507 if webpage:
2508 initial_pr = self._extract_yt_initial_variable(
2509 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2510 video_id, 'initial player response')
6b09401b 2511
c0bc527b
M
2512 original_clients = clients
2513 clients = clients[::-1]
e7e94f2a
D
2514
2515 def append_client(client_name):
2516 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2517 clients.append(client_name)
2518
379e44ed 2519 # Android player_response does not have microFormats which are needed for
2520 # extraction of some data. So we return the initial_pr with formats
2521 # stripped out even if not requested by the user
2522 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2523 yielded_pr = False
2524 if initial_pr:
2525 pr = dict(initial_pr)
2526 pr['streamingData'] = None
2527 yielded_pr = True
2528 yield pr
2529
2530 last_error = None
c0bc527b
M
2531 while clients:
2532 client = clients.pop()
11f9be09 2533 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2534 if 'configs' not in self._configuration_arg('player_skip'):
2535 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b 2536
379e44ed 2537 try:
2538 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2539 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2540 except ExtractorError as e:
2541 if last_error:
2542 self.report_warning(last_error)
2543 last_error = e
2544 continue
2545
11f9be09 2546 if pr:
379e44ed 2547 yielded_pr = True
11f9be09 2548 yield pr
c0bc527b 2549
e7e94f2a
D
2550 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2551 if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header():
2552 append_client(client.replace('_agegate', '_creator'))
2553 elif self._is_agegated(pr):
2554 append_client(f'{client}_agegate')
c0bc527b 2555
379e44ed 2556 if last_error:
2557 if not yielded_pr:
2558 raise last_error
2559 self.report_warning(last_error)
11f9be09 2560
2561 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2562 itags, stream_ids = [], []
2a9c6dcd 2563 itag_qualities, res_qualities = {}, {}
d3fc8074 2564 q = qualities([
2a9c6dcd 2565 # Normally tiny is the smallest video-only formats. But
2566 # audio-only formats with unknown quality may get tagged as tiny
2567 'tiny',
2568 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2569 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2570 ])
11f9be09 2571 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2572
545cc85d 2573 for fmt in streaming_formats:
2574 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2575 continue
321bf820 2576
cc2db878 2577 itag = str_or_none(fmt.get('itag'))
9297939e 2578 audio_track = fmt.get('audioTrack') or {}
2579 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2580 if stream_id in stream_ids:
2581 continue
2582
cc2db878 2583 quality = fmt.get('quality')
2a9c6dcd 2584 height = int_or_none(fmt.get('height'))
d3fc8074 2585 if quality == 'tiny' or not quality:
2586 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2587 # The 3gp format (17) in android client has a quality of "small",
2588 # but is actually worse than other formats
2589 if itag == '17':
2590 quality = 'tiny'
2591 if quality:
2592 if itag:
2593 itag_qualities[itag] = quality
2594 if height:
2595 res_qualities[height] = quality
cc2db878 2596 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2597 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2598 # number of fragment that would subsequently requested with (`&sq=N`)
2599 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2600 continue
2601
545cc85d 2602 fmt_url = fmt.get('url')
2603 if not fmt_url:
2604 sc = compat_parse_qs(fmt.get('signatureCipher'))
2605 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2606 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2607 if not (sc and fmt_url and encrypted_sig):
2608 continue
545cc85d 2609 if not player_url:
201e9eaa 2610 continue
545cc85d 2611 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2612 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2613 fmt_url += '&' + sp + '=' + signature
2614
545cc85d 2615 if itag:
2616 itags.append(itag)
9297939e 2617 stream_ids.append(stream_id)
2618
cc2db878 2619 tbr = float_or_none(
2620 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2621 dct = {
2622 'asr': int_or_none(fmt.get('audioSampleRate')),
2623 'filesize': int_or_none(fmt.get('contentLength')),
2624 'format_id': itag,
11f9be09 2625 'format_note': ', '.join(filter(None, (
2a9c6dcd 2626 audio_track.get('displayName'),
2627 fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
545cc85d 2628 'fps': int_or_none(fmt.get('fps')),
2a9c6dcd 2629 'height': height,
dca3ff4a 2630 'quality': q(quality),
cc2db878 2631 'tbr': tbr,
545cc85d 2632 'url': fmt_url,
2a9c6dcd 2633 'width': int_or_none(fmt.get('width')),
0fb983f6 2634 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2635 }
60bdb7bd 2636 mime_mobj = re.match(
2637 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2638 if mime_mobj:
2639 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2640 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2641 no_audio = dct.get('acodec') == 'none'
2642 no_video = dct.get('vcodec') == 'none'
2643 if no_audio:
2644 dct['vbr'] = tbr
2645 if no_video:
2646 dct['abr'] = tbr
2647 if no_audio or no_video:
545cc85d 2648 dct['downloader_options'] = {
2649 # Youtube throttles chunks >~10M
2650 'http_chunk_size': 10485760,
bf1317d2 2651 }
7c60c33e 2652 if dct.get('ext'):
2653 dct['container'] = dct['ext'] + '_dash'
11f9be09 2654 yield dct
545cc85d 2655
4bb6b02f 2656 skip_manifests = self._configuration_arg('skip')
57015a4a 2657 get_dash = (
2658 (not is_live or self._configuration_arg('include_live_dash'))
2659 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
5d3a0e79 2660 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2661
2a9c6dcd 2662 def guess_quality(f):
2663 for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2664 if val in qdict:
2665 return q(qdict[val])
2666 return -1
2667
11f9be09 2668 for sd in streaming_data:
5d3a0e79 2669 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2670 if hls_manifest_url:
2a9c6dcd 2671 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
9297939e 2672 itag = self._search_regex(
2673 r'/itag/(\d+)', f['url'], 'itag', default=None)
11f9be09 2674 if itag in itags:
2675 continue
9297939e 2676 if itag:
2677 f['format_id'] = itag
11f9be09 2678 itags.append(itag)
2a9c6dcd 2679 f['quality'] = guess_quality(f)
11f9be09 2680 yield f
545cc85d 2681
5d3a0e79 2682 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2683 if dash_manifest_url:
2a9c6dcd 2684 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
5d3a0e79 2685 itag = f['format_id']
2686 if itag in itags:
2687 continue
11f9be09 2688 if itag:
2689 itags.append(itag)
2a9c6dcd 2690 f['quality'] = guess_quality(f)
5d3a0e79 2691 filesize = int_or_none(self._search_regex(
2692 r'/clen/(\d+)', f.get('fragment_base_url')
2693 or f['url'], 'file size', default=None))
2694 if filesize:
2695 f['filesize'] = filesize
11f9be09 2696 yield f
2697
2698 def _real_extract(self, url):
2699 url, smuggled_data = unsmuggle_url(url, {})
2700 video_id = self._match_id(url)
2701
2702 base_url = self.http_scheme() + '//www.youtube.com/'
2703 webpage_url = base_url + 'watch?v=' + video_id
2704 webpage = self._download_webpage(
2705 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2706
2707 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2708 player_url = self._extract_player_url(master_ytcfg, webpage)
2709 identity_token = self._extract_identity_token(webpage, video_id)
2710
2711 player_responses = list(self._extract_player_responses(
2712 self._get_requested_clients(url, smuggled_data),
2713 video_id, webpage, master_ytcfg, player_url, identity_token))
2714
352d63fd 2715 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2716
2717 playability_statuses = traverse_obj(
2718 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2719
2720 trailer_video_id = get_first(
2721 playability_statuses,
2722 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2723 expected_type=str)
2724 if trailer_video_id:
2725 return self.url_result(
2726 trailer_video_id, self.ie_key(), trailer_video_id)
2727
2728 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2729 if webpage else (lambda x: None))
2730
2731 video_details = traverse_obj(
2732 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2733 microformats = traverse_obj(
2734 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2735 expected_type=dict, default=[])
2736 video_title = (
2737 get_first(video_details, 'title')
2738 or self._get_text(microformats, (..., 'title'))
2739 or search_meta(['og:title', 'twitter:title', 'title']))
2740 video_description = get_first(video_details, 'shortDescription')
2741
2742 if not smuggled_data.get('force_singlefeed', False):
2743 if not self.get_param('noplaylist'):
2744 multifeed_metadata_list = get_first(
2745 player_responses,
2746 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2747 expected_type=str)
2748 if multifeed_metadata_list:
2749 entries = []
2750 feed_ids = []
2751 for feed in multifeed_metadata_list.split(','):
2752 # Unquote should take place before split on comma (,) since textual
2753 # fields may contain comma as well (see
2754 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2755 feed_data = compat_parse_qs(
2756 compat_urllib_parse_unquote_plus(feed))
2757
2758 def feed_entry(name):
2759 return try_get(
2760 feed_data, lambda x: x[name][0], compat_str)
2761
2762 feed_id = feed_entry('id')
2763 if not feed_id:
2764 continue
2765 feed_title = feed_entry('title')
2766 title = video_title
2767 if feed_title:
2768 title += ' (%s)' % feed_title
2769 entries.append({
2770 '_type': 'url_transparent',
2771 'ie_key': 'Youtube',
2772 'url': smuggle_url(
2773 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2774 {'force_singlefeed': True}),
2775 'title': title,
2776 })
2777 feed_ids.append(feed_id)
2778 self.to_screen(
2779 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2780 % (', '.join(feed_ids), video_id))
2781 return self.playlist_result(
2782 entries, video_id, video_title, video_description)
2783 else:
2784 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2785
7ea65411 2786 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2787 is_live = get_first(video_details, 'isLive')
7ea65411 2788 if is_live is None:
2789 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2790
2791 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2792 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2793
545cc85d 2794 if not formats:
11f9be09 2795 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
b7da73eb 2796 self.raise_no_formats(
545cc85d 2797 'This video is DRM protected.', expected=True)
11f9be09 2798 pemr = get_first(
2799 playability_statuses,
2800 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2801 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2802 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2803 if subreason:
545cc85d 2804 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2805 countries = get_first(microformats, 'availableCountries')
545cc85d 2806 if not countries:
2807 regions_allowed = search_meta('regionsAllowed')
2808 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2809 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2810 reason += f'. {subreason}'
545cc85d 2811 if reason:
b7da73eb 2812 self.raise_no_formats(reason, expected=True)
bf1317d2 2813
11f9be09 2814 for f in formats:
2a9c6dcd 2815 if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
11f9be09 2816 f['source_preference'] = -10
3619f78d 2817 # TODO: this method is not reliable
2818 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
11f9be09 2819
2a9c6dcd 2820 # Source is given priority since formats that throttle are given lower source_preference
2821 # When throttling issue is fully fixed, remove this
2822 self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
bf1317d2 2823
11f9be09 2824 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2825 if not keywords and webpage:
2826 keywords = [
2827 unescapeHTML(m.group('content'))
2828 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2829 for keyword in keywords:
2830 if keyword.startswith('yt:stretch='):
201c1459 2831 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2832 if mobj:
2833 # NB: float is intentional for forcing float division
2834 w, h = (float(v) for v in mobj.groups())
2835 if w > 0 and h > 0:
2836 ratio = w / h
2837 for f in formats:
2838 if f.get('vcodec') != 'none':
2839 f['stretched_ratio'] = ratio
2840 break
6449cd80 2841
545cc85d 2842 thumbnails = []
11f9be09 2843 thumbnail_dicts = traverse_obj(
2844 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2845 expected_type=dict, default=[])
2846 for thumbnail in thumbnail_dicts:
2847 thumbnail_url = thumbnail.get('url')
2848 if not thumbnail_url:
2849 continue
2850 # Sometimes youtube gives a wrong thumbnail URL. See:
2851 # https://github.com/yt-dlp/yt-dlp/issues/233
2852 # https://github.com/ytdl-org/youtube-dl/issues/28023
2853 if 'maxresdefault' in thumbnail_url:
2854 thumbnail_url = thumbnail_url.split('?')[0]
2855 thumbnails.append({
2856 'url': thumbnail_url,
2857 'height': int_or_none(thumbnail.get('height')),
2858 'width': int_or_none(thumbnail.get('width')),
2859 })
ff2751ac 2860 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2861 if thumbnail_url:
2862 thumbnails.append({
2863 'url': thumbnail_url,
ff2751ac 2864 })
0ba692ac 2865 # The best resolution thumbnails sometimes does not appear in the webpage
2866 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2867 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2868 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
245524e6 2869 # TODO: Test them also? - For some videos, even these don't exist
cca80fe6 2870 guaranteed_thumbnail_names = [
2871 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2872 'mqdefault', 'mq1', 'mq2', 'mq3',
2873 'default', '1', '2', '3'
2874 ]
2875 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2876 n_thumbnail_names = len(thumbnail_names)
2877
0ba692ac 2878 thumbnails.extend({
2879 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2880 video_id=video_id, name=name, ext=ext,
2881 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2882 '_test_url': name in hq_thumbnail_names,
2883 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2884 for thumb in thumbnails:
cca80fe6 2885 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2886 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2887 self._remove_duplicate_formats(thumbnails)
545cc85d 2888
7ea65411 2889 category = get_first(microformats, 'category') or search_meta('genre')
2890 channel_id = str_or_none(
2891 get_first(video_details, 'channelId')
2892 or get_first(microformats, 'externalChannelId')
2893 or search_meta('channelId'))
2894 duration = int_or_none(
2895 get_first(video_details, 'lengthSeconds')
2896 or get_first(microformats, 'lengthSeconds')
2897 or parse_duration(search_meta('duration'))) or None
2898 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2899
2900 live_content = get_first(video_details, 'isLiveContent')
2901 is_upcoming = get_first(video_details, 'isUpcoming')
2902 if is_live is None:
2903 if is_upcoming or live_content is False:
2904 is_live = False
2905 if is_upcoming is None and (live_content or is_live):
2906 is_upcoming = False
2907 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2908 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2909 if not duration and live_endtime and live_starttime:
2910 duration = live_endtime - live_starttime
2911
545cc85d 2912 info = {
2913 'id': video_id,
2914 'title': self._live_title(video_title) if is_live else video_title,
2915 'formats': formats,
2916 'thumbnails': thumbnails,
2917 'description': video_description,
2918 'upload_date': unified_strdate(
11f9be09 2919 get_first(microformats, 'uploadDate')
545cc85d 2920 or search_meta('uploadDate')),
11f9be09 2921 'uploader': get_first(video_details, 'author'),
545cc85d 2922 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2923 'uploader_url': owner_profile_url,
2924 'channel_id': channel_id,
11f9be09 2925 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2926 'duration': duration,
2927 'view_count': int_or_none(
11f9be09 2928 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2929 or search_meta('interactionCount')),
11f9be09 2930 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2931 'age_limit': 18 if (
11f9be09 2932 get_first(microformats, 'isFamilySafe') is False
545cc85d 2933 or search_meta('isFamilyFriendly') == 'false'
2934 or search_meta('og:restrictions:age') == '18+') else 0,
2935 'webpage_url': webpage_url,
2936 'categories': [category] if category else None,
2937 'tags': keywords,
11f9be09 2938 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2939 'is_live': is_live,
2940 'was_live': (False if is_live or is_upcoming or live_content is False
2941 else None if is_live is None or is_upcoming is None
2942 else live_content),
2943 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2944 'release_timestamp': live_starttime,
545cc85d 2945 }
b477fc13 2946
3944e7af 2947 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2948 # Converted into dicts to remove duplicates
2949 captions = {
2950 sub.get('baseUrl'): sub
2951 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2952 translation_languages = {
2953 lang.get('languageCode'): lang.get('languageName')
2954 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
545cc85d 2955 subtitles = {}
2956 if pctr:
774d79cc 2957 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2958 lang_subs = container.setdefault(lang_code, [])
545cc85d 2959 for fmt in self._SUBTITLE_FORMATS:
2960 query.update({
2961 'fmt': fmt,
2962 })
2963 lang_subs.append({
2964 'ext': fmt,
2965 'url': update_url_query(base_url, query),
774d79cc 2966 'name': sub_name,
545cc85d 2967 })
7e72694b 2968
3944e7af 2969 for base_url, caption_track in captions.items():
545cc85d 2970 if not base_url:
2971 continue
2972 if caption_track.get('kind') != 'asr':
120916da 2973 lang_code = (
2974 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2975 or caption_track.get('languageCode'))
545cc85d 2976 if not lang_code:
2977 continue
2978 process_language(
774d79cc 2979 subtitles, base_url, lang_code,
3944e7af 2980 traverse_obj(caption_track, ('name', 'simpleText')),
774d79cc 2981 {})
545cc85d 2982 continue
2983 automatic_captions = {}
3944e7af 2984 for trans_code, trans_name in translation_languages.items():
2985 if not trans_code:
545cc85d 2986 continue
2987 process_language(
3944e7af 2988 automatic_captions, base_url, trans_code,
2989 self._get_text(trans_name, max_runs=1),
2990 {'tlang': trans_code})
545cc85d 2991 info['automatic_captions'] = automatic_captions
2992 info['subtitles'] = subtitles
7e72694b 2993
545cc85d 2994 parsed_url = compat_urllib_parse_urlparse(url)
2995 for component in [parsed_url.fragment, parsed_url.query]:
2996 query = compat_parse_qs(component)
2997 for k, v in query.items():
2998 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2999 d_k += '_time'
3000 if d_k not in info and k in s_ks:
3001 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3002
3003 # Youtube Music Auto-generated description
822b9d9c 3004 if video_description:
38d70284 3005 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 3006 if mobj:
822b9d9c
RA
3007 release_year = mobj.group('release_year')
3008 release_date = mobj.group('release_date')
3009 if release_date:
3010 release_date = release_date.replace('-', '')
3011 if not release_year:
545cc85d 3012 release_year = release_date[:4]
3013 info.update({
3014 'album': mobj.group('album'.strip()),
3015 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3016 'track': mobj.group('track').strip(),
3017 'release_date': release_date,
cc2db878 3018 'release_year': int_or_none(release_year),
545cc85d 3019 })
7e72694b 3020
545cc85d 3021 initial_data = None
3022 if webpage:
3023 initial_data = self._extract_yt_initial_variable(
3024 webpage, self._YT_INITIAL_DATA_RE, video_id,
3025 'yt initial data')
3026 if not initial_data:
11f9be09 3027 headers = self.generate_api_headers(
3028 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
3029 session_index=self._extract_session_index(master_ytcfg))
3030
109dd3b2 3031 initial_data = self._extract_response(
3032 item_id=video_id, ep='next', fatal=False,
11f9be09 3033 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
109dd3b2 3034 note='Downloading initial data API JSON')
545cc85d 3035
c60ee3a2 3036 try:
3037 # This will error if there is no livechat
3038 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3039 info['subtitles']['live_chat'] = [{
3040 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3041 'video_id': video_id,
3042 'ext': 'json',
f6745c49 3043 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3044 }]
3045 except (KeyError, IndexError, TypeError):
3046 pass
545cc85d 3047
3048 if initial_data:
7c365c21 3049 info['chapters'] = (
3050 self._extract_chapters_from_json(initial_data, duration)
3051 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3052 or None)
545cc85d 3053
3054 contents = try_get(
3055 initial_data,
3056 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3057 list) or []
3058 for content in contents:
3059 vpir = content.get('videoPrimaryInfoRenderer')
3060 if vpir:
3061 stl = vpir.get('superTitleLink')
3062 if stl:
fe93e2c4 3063 stl = self._get_text(stl)
545cc85d 3064 if try_get(
3065 vpir,
3066 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3067 info['location'] = stl
3068 else:
3069 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3070 if mobj:
3071 info.update({
3072 'series': mobj.group(1),
3073 'season_number': int(mobj.group(2)),
3074 'episode_number': int(mobj.group(3)),
3075 })
3076 for tlb in (try_get(
3077 vpir,
3078 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3079 list) or []):
3080 tbr = tlb.get('toggleButtonRenderer') or {}
3081 for getter, regex in [(
3082 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3083 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3084 lambda x: x['accessibility'],
3085 lambda x: x['accessibilityData']['accessibilityData'],
3086 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3087 label = (try_get(tbr, getter, dict) or {}).get('label')
3088 if label:
3089 mobj = re.match(regex, label)
3090 if mobj:
3091 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3092 break
3093 sbr_tooltip = try_get(
3094 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3095 if sbr_tooltip:
3096 like_count, dislike_count = sbr_tooltip.split(' / ')
3097 info.update({
3098 'like_count': str_to_int(like_count),
3099 'dislike_count': str_to_int(dislike_count),
3100 })
3101 vsir = content.get('videoSecondaryInfoRenderer')
3102 if vsir:
052e1350 3103 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3104 rows = try_get(
3105 vsir,
3106 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3107 list) or []
3108 multiple_songs = False
3109 for row in rows:
3110 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3111 multiple_songs = True
3112 break
3113 for row in rows:
3114 mrr = row.get('metadataRowRenderer') or {}
3115 mrr_title = mrr.get('title')
3116 if not mrr_title:
3117 continue
052e1350 3118 mrr_title = self._get_text(mrr, 'title')
3119 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3120 if mrr_title == 'License':
3121 info['license'] = mrr_contents_text
3122 elif not multiple_songs:
3123 if mrr_title == 'Album':
3124 info['album'] = mrr_contents_text
3125 elif mrr_title == 'Artist':
3126 info['artist'] = mrr_contents_text
3127 elif mrr_title == 'Song':
3128 info['track'] = mrr_contents_text
3129
3130 fallbacks = {
3131 'channel': 'uploader',
3132 'channel_id': 'uploader_id',
3133 'channel_url': 'uploader_url',
3134 }
3135 for to, frm in fallbacks.items():
3136 if not info.get(to):
3137 info[to] = info.get(frm)
3138
3139 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3140 v = info.get(s_k)
3141 if v:
3142 info[d_k] = v
b84071c0 3143
11f9be09 3144 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3145 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3146 is_membersonly = None
b28f8d24 3147 is_premium = None
c224251a
M
3148 if initial_data and is_private is not None:
3149 is_membersonly = False
b28f8d24 3150 is_premium = False
47193e02 3151 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3152 badge_labels = set()
3153 for content in contents:
3154 if not isinstance(content, dict):
3155 continue
3156 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3157 for badge_label in badge_labels:
3158 if badge_label.lower() == 'members only':
3159 is_membersonly = True
3160 elif badge_label.lower() == 'premium':
3161 is_premium = True
3162 elif badge_label.lower() == 'unlisted':
3163 is_unlisted = True
c224251a 3164
c224251a
M
3165 info['availability'] = self._availability(
3166 is_private=is_private,
b28f8d24 3167 needs_premium=is_premium,
c224251a
M
3168 needs_subscription=is_membersonly,
3169 needs_auth=info['age_limit'] >= 18,
3170 is_unlisted=None if is_private is None else is_unlisted)
3171
06167fbb 3172 # get xsrf for annotations or comments
a06916d9 3173 get_annotations = self.get_param('writeannotations', False)
3174 get_comments = self.get_param('getcomments', False)
06167fbb 3175 if get_annotations or get_comments:
29f7c58a 3176 xsrf_token = None
11f9be09 3177 if master_ytcfg:
3178 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
29f7c58a 3179 if not xsrf_token:
3180 xsrf_token = self._search_regex(
3181 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 3182 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 3183
3184 # annotations
06167fbb 3185 if get_annotations:
11f9be09 3186 invideo_url = get_first(
3187 player_responses,
3188 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3189 expected_type=str)
64b6a4e9 3190 if xsrf_token and invideo_url:
29f7c58a 3191 xsrf_field_name = None
11f9be09 3192 if master_ytcfg:
3193 xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
29f7c58a 3194 if not xsrf_field_name:
3195 xsrf_field_name = self._search_regex(
3196 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3197 webpage, 'xsrf field name',
29f7c58a 3198 group='xsrf_field_name', default='session_token')
8a784c74 3199 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3200 self._proto_relative_url(invideo_url),
3201 video_id, note='Downloading annotations',
3202 errnote='Unable to download video annotations', fatal=False,
3203 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3204
277d6ff5 3205 if get_comments:
11f9be09 3206 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3207
11f9be09 3208 self.mark_watched(video_id, player_responses)
d77ab8e2 3209
545cc85d 3210 return info
c5e8d7af 3211
5f6a1245 3212
8bdd16b4 3213class YoutubeTabIE(YoutubeBaseInfoExtractor):
3214 IE_DESC = 'YouTube.com tab'
70d5c17b 3215 _VALID_URL = r'''(?x)
3216 https?://
3217 (?:\w+\.)?
3218 (?:
3219 youtube(?:kids)?\.com|
3220 invidio\.us
3221 )/
3222 (?:
fe03a6cd 3223 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3224 (?P<not_channel>
9ba5705a 3225 feed/|hashtag/|
70d5c17b 3226 (?:playlist|watch)\?.*?\blist=
3227 )|
29f7c58a 3228 (?!(?:%s)\b) # Direct URLs
70d5c17b 3229 )
3230 (?P<id>[^/?\#&]+)
3231 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3232 IE_NAME = 'youtube:tab'
3233
81127aa5 3234 _TESTS = [{
da692b79 3235 'note': 'playlists, multipage',
8bdd16b4 3236 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3237 'playlist_mincount': 94,
3238 'info_dict': {
3239 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3240 'title': 'Игорь Клейнер - Playlists',
3241 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3242 'uploader': 'Игорь Клейнер',
3243 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3244 },
3245 }, {
da692b79 3246 'note': 'playlists, multipage, different order',
8bdd16b4 3247 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3248 'playlist_mincount': 94,
3249 'info_dict': {
3250 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3251 'title': 'Игорь Клейнер - Playlists',
3252 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3253 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3254 'uploader': 'Игорь Клейнер',
8bdd16b4 3255 },
201c1459 3256 }, {
da692b79 3257 'note': 'playlists, series',
201c1459 3258 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3259 'playlist_mincount': 5,
3260 'info_dict': {
3261 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3262 'title': '3Blue1Brown - Playlists',
3263 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3264 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3265 'uploader': '3Blue1Brown',
201c1459 3266 },
8bdd16b4 3267 }, {
da692b79 3268 'note': 'playlists, singlepage',
8bdd16b4 3269 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3270 'playlist_mincount': 4,
3271 'info_dict': {
3272 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3273 'title': 'ThirstForScience - Playlists',
3274 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3275 'uploader': 'ThirstForScience',
3276 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3277 }
3278 }, {
3279 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3280 'only_matching': True,
3281 }, {
da692b79 3282 'note': 'basic, single video playlist',
0e30a7b9 3283 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3284 'info_dict': {
0e30a7b9 3285 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3286 'uploader': 'Sergey M.',
3287 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3288 'title': 'youtube-dl public playlist',
81127aa5 3289 },
0e30a7b9 3290 'playlist_count': 1,
9291475f 3291 }, {
da692b79 3292 'note': 'empty playlist',
0e30a7b9 3293 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3294 'info_dict': {
0e30a7b9 3295 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3296 'uploader': 'Sergey M.',
3297 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3298 'title': 'youtube-dl empty playlist',
9291475f
PH
3299 },
3300 'playlist_count': 0,
3301 }, {
da692b79 3302 'note': 'Home tab',
8bdd16b4 3303 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3304 'info_dict': {
8bdd16b4 3305 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3306 'title': 'lex will - Home',
3307 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3308 'uploader': 'lex will',
3309 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3310 },
8bdd16b4 3311 'playlist_mincount': 2,
9291475f 3312 }, {
da692b79 3313 'note': 'Videos tab',
8bdd16b4 3314 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3315 'info_dict': {
8bdd16b4 3316 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3317 'title': 'lex will - Videos',
3318 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3319 'uploader': 'lex will',
3320 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3321 },
8bdd16b4 3322 'playlist_mincount': 975,
9291475f 3323 }, {
da692b79 3324 'note': 'Videos tab, sorted by popular',
8bdd16b4 3325 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3326 'info_dict': {
8bdd16b4 3327 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3328 'title': 'lex will - Videos',
3329 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3330 'uploader': 'lex will',
3331 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3332 },
8bdd16b4 3333 'playlist_mincount': 199,
9291475f 3334 }, {
da692b79 3335 'note': 'Playlists tab',
8bdd16b4 3336 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3337 'info_dict': {
8bdd16b4 3338 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3339 'title': 'lex will - Playlists',
3340 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3341 'uploader': 'lex will',
3342 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3343 },
8bdd16b4 3344 'playlist_mincount': 17,
ac7553d0 3345 }, {
da692b79 3346 'note': 'Community tab',
8bdd16b4 3347 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3348 'info_dict': {
8bdd16b4 3349 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3350 'title': 'lex will - Community',
3351 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3352 'uploader': 'lex will',
3353 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3354 },
3355 'playlist_mincount': 18,
87dadd45 3356 }, {
da692b79 3357 'note': 'Channels tab',
8bdd16b4 3358 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3359 'info_dict': {
8bdd16b4 3360 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3361 'title': 'lex will - Channels',
3362 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3363 'uploader': 'lex will',
3364 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3365 },
deaec5af 3366 'playlist_mincount': 12,
cd684175 3367 }, {
3368 'note': 'Search tab',
3369 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3370 'playlist_mincount': 40,
3371 'info_dict': {
3372 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3373 'title': '3Blue1Brown - Search - linear algebra',
3374 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3375 'uploader': '3Blue1Brown',
3376 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3377 },
6b08cdf6 3378 }, {
a0566bbf 3379 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3380 'only_matching': True,
3381 }, {
a0566bbf 3382 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3383 'only_matching': True,
3384 }, {
a0566bbf 3385 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3386 'only_matching': True,
3387 }, {
3388 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3389 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3390 'info_dict': {
3391 'title': '29C3: Not my department',
3392 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3393 'uploader': 'Christiaan008',
3394 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3395 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3396 },
3397 'playlist_count': 96,
3398 }, {
3399 'note': 'Large playlist',
3400 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3401 'info_dict': {
8bdd16b4 3402 'title': 'Uploads from Cauchemar',
3403 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3404 'uploader': 'Cauchemar',
3405 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3406 },
8bdd16b4 3407 'playlist_mincount': 1123,
3408 }, {
da692b79 3409 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3410 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3411 'only_matching': True,
4b7df0d3
JMF
3412 }, {
3413 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3414 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3415 'info_dict': {
acf757f4
PH
3416 'title': 'Uploads from Interstellar Movie',
3417 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3418 'uploader': 'Interstellar Movie',
8bdd16b4 3419 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3420 },
481cc733 3421 'playlist_mincount': 21,
358de58c 3422 }, {
3423 'note': 'Playlist with "show unavailable videos" button',
3424 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3425 'info_dict': {
3426 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3427 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3428 'uploader': 'Phim Siêu Nhân Nhật Bản',
3429 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3430 },
da692b79 3431 'playlist_mincount': 200,
5d342002 3432 }, {
da692b79 3433 'note': 'Playlist with unavailable videos in page 7',
5d342002 3434 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3435 'info_dict': {
3436 'title': 'Uploads from BlankTV',
3437 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3438 'uploader': 'BlankTV',
3439 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3440 },
da692b79 3441 'playlist_mincount': 1000,
8bdd16b4 3442 }, {
da692b79 3443 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3444 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3445 'info_dict': {
3446 'title': 'Data Analysis with Dr Mike Pound',
3447 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3448 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3449 'uploader': 'Computerphile',
deaec5af 3450 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3451 },
3452 'playlist_mincount': 11,
3453 }, {
a0566bbf 3454 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3455 'only_matching': True,
dacb3a86 3456 }, {
da692b79 3457 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3458 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3459 'info_dict': {
3460 'id': 'FqZTN594JQw',
3461 'ext': 'webm',
3462 'title': "Smiley's People 01 detective, Adventure Series, Action",
3463 'uploader': 'STREEM',
3464 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3465 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3466 'upload_date': '20150526',
3467 'license': 'Standard YouTube License',
3468 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3469 'categories': ['People & Blogs'],
3470 'tags': list,
dbdaaa23 3471 'view_count': int,
dacb3a86
S
3472 'like_count': int,
3473 'dislike_count': int,
3474 },
3475 'params': {
3476 'skip_download': True,
3477 },
13a75688 3478 'skip': 'This video is not available.',
dacb3a86 3479 'add_ie': [YoutubeIE.ie_key()],
481cc733 3480 }, {
8bdd16b4 3481 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3482 'only_matching': True,
66b48727 3483 }, {
8bdd16b4 3484 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3485 'only_matching': True,
a0566bbf 3486 }, {
3487 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3488 'info_dict': {
57015a4a 3489 'id': '3yImotZU3tw', # This will keep changing
a0566bbf 3490 'ext': 'mp4',
deaec5af 3491 'title': compat_str,
a0566bbf 3492 'uploader': 'Sky News',
3493 'uploader_id': 'skynews',
3494 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3495 'upload_date': r're:\d{8}',
3496 'description': compat_str,
a0566bbf 3497 'categories': ['News & Politics'],
3498 'tags': list,
3499 'like_count': int,
3500 'dislike_count': int,
3501 },
3502 'params': {
3503 'skip_download': True,
3504 },
da692b79 3505 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3506 }, {
3507 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3508 'info_dict': {
3509 'id': 'a48o2S1cPoo',
3510 'ext': 'mp4',
3511 'title': 'The Young Turks - Live Main Show',
3512 'uploader': 'The Young Turks',
3513 'uploader_id': 'TheYoungTurks',
3514 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3515 'upload_date': '20150715',
3516 'license': 'Standard YouTube License',
3517 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3518 'categories': ['News & Politics'],
3519 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3520 'like_count': int,
3521 'dislike_count': int,
3522 },
3523 'params': {
3524 'skip_download': True,
3525 },
3526 'only_matching': True,
3527 }, {
3528 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3529 'only_matching': True,
3530 }, {
3531 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3532 'only_matching': True,
09f1580e 3533 }, {
3534 'note': 'A channel that is not live. Should raise error',
3535 'url': 'https://www.youtube.com/user/numberphile/live',
3536 'only_matching': True,
3d3dddc9 3537 }, {
3538 'url': 'https://www.youtube.com/feed/trending',
3539 'only_matching': True,
3540 }, {
3d3dddc9 3541 'url': 'https://www.youtube.com/feed/library',
3542 'only_matching': True,
3543 }, {
3d3dddc9 3544 'url': 'https://www.youtube.com/feed/history',
3545 'only_matching': True,
3546 }, {
3d3dddc9 3547 'url': 'https://www.youtube.com/feed/subscriptions',
3548 'only_matching': True,
3549 }, {
3d3dddc9 3550 'url': 'https://www.youtube.com/feed/watch_later',
3551 'only_matching': True,
3552 }, {
da692b79 3553 'note': 'Recommended - redirects to home page',
3d3dddc9 3554 'url': 'https://www.youtube.com/feed/recommended',
3555 'only_matching': True,
29f7c58a 3556 }, {
da692b79 3557 'note': 'inline playlist with not always working continuations',
29f7c58a 3558 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3559 'only_matching': True,
3560 }, {
3561 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3562 'only_matching': True,
3563 }, {
3564 'url': 'https://www.youtube.com/course',
3565 'only_matching': True,
3566 }, {
3567 'url': 'https://www.youtube.com/zsecurity',
3568 'only_matching': True,
3569 }, {
3570 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3571 'only_matching': True,
3572 }, {
3573 'url': 'https://www.youtube.com/TheYoungTurks/live',
3574 'only_matching': True,
39ed931e 3575 }, {
3576 'url': 'https://www.youtube.com/hashtag/cctv9',
3577 'info_dict': {
3578 'id': 'cctv9',
3579 'title': '#cctv9',
3580 },
3581 'playlist_mincount': 350,
201c1459 3582 }, {
3583 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3584 'only_matching': True,
9297939e 3585 }, {
da692b79 3586 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3587 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3588 'only_matching': True
fe03a6cd 3589 }, {
3590 'note': '/browse/ should redirect to /channel/',
3591 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3592 'only_matching': True
3593 }, {
3594 'note': 'VLPL, should redirect to playlist?list=PL...',
3595 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3596 'info_dict': {
3597 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3598 'uploader': 'NoCopyrightSounds',
3599 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3600 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3601 'title': 'NCS Releases',
3602 },
3603 'playlist_mincount': 166,
18db7548 3604 }, {
3605 'note': 'Topic, should redirect to playlist?list=UU...',
3606 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3607 'info_dict': {
3608 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3609 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3610 'title': 'Uploads from Royalty Free Music - Topic',
3611 'uploader': 'Royalty Free Music - Topic',
3612 },
3613 'expected_warnings': [
3614 'A channel/user page was given',
3615 'The URL does not have a videos tab',
3616 ],
3617 'playlist_mincount': 101,
3618 }, {
3619 'note': 'Topic without a UU playlist',
3620 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3621 'info_dict': {
3622 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3623 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3624 },
3625 'expected_warnings': [
3626 'A channel/user page was given',
3627 'The URL does not have a videos tab',
3628 'Falling back to channel URL',
3629 ],
3630 'playlist_mincount': 9,
abcdd12b 3631 }, {
3632 'note': 'Youtube music Album',
3633 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3634 'info_dict': {
3635 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3636 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3637 },
3638 'playlist_count': 50,
47193e02 3639 }, {
3640 'note': 'unlisted single video playlist',
3641 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3642 'info_dict': {
3643 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3644 'uploader': 'colethedj',
3645 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3646 'title': 'yt-dlp unlisted playlist test',
3647 'availability': 'unlisted'
3648 },
3649 'playlist_count': 1,
29f7c58a 3650 }]
3651
3652 @classmethod
3653 def suitable(cls, url):
3654 return False if YoutubeIE.suitable(url) else super(
3655 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3656
3657 def _extract_channel_id(self, webpage):
3658 channel_id = self._html_search_meta(
3659 'channelId', webpage, 'channel id', default=None)
3660 if channel_id:
3661 return channel_id
3662 channel_url = self._html_search_meta(
3663 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3664 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3665 'twitter:app:url:googleplay'), webpage, 'channel url')
3666 return self._search_regex(
3667 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3668 channel_url, 'channel id')
15f6397c 3669
8bdd16b4 3670 @staticmethod
cd7c66cf 3671 def _extract_basic_item_renderer(item):
3672 # Modified from _extract_grid_item_renderer
201c1459 3673 known_basic_renderers = (
3674 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3675 )
3676 for key, renderer in item.items():
201c1459 3677 if not isinstance(renderer, dict):
cd7c66cf 3678 continue
201c1459 3679 elif key in known_basic_renderers:
3680 return renderer
3681 elif key.startswith('grid') and key.endswith('Renderer'):
3682 return renderer
8bdd16b4 3683
8bdd16b4 3684 def _grid_entries(self, grid_renderer):
3685 for item in grid_renderer['items']:
3686 if not isinstance(item, dict):
39b62db1 3687 continue
cd7c66cf 3688 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3689 if not isinstance(renderer, dict):
3690 continue
052e1350 3691 title = self._get_text(renderer, 'title')
fe93e2c4 3692
8bdd16b4 3693 # playlist
3694 playlist_id = renderer.get('playlistId')
3695 if playlist_id:
3696 yield self.url_result(
3697 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3698 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3699 video_title=title)
201c1459 3700 continue
8bdd16b4 3701 # video
3702 video_id = renderer.get('videoId')
3703 if video_id:
3704 yield self._extract_video(renderer)
201c1459 3705 continue
8bdd16b4 3706 # channel
3707 channel_id = renderer.get('channelId')
3708 if channel_id:
8bdd16b4 3709 yield self.url_result(
3710 'https://www.youtube.com/channel/%s' % channel_id,
3711 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3712 continue
3713 # generic endpoint URL support
3714 ep_url = urljoin('https://www.youtube.com/', try_get(
3715 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3716 compat_str))
3717 if ep_url:
3718 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3719 if ie.suitable(ep_url):
3720 yield self.url_result(
3721 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3722 break
8bdd16b4 3723
3d3dddc9 3724 def _shelf_entries_from_content(self, shelf_renderer):
3725 content = shelf_renderer.get('content')
3726 if not isinstance(content, dict):
8bdd16b4 3727 return
cd7c66cf 3728 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3729 if renderer:
3730 # TODO: add support for nested playlists so each shelf is processed
3731 # as separate playlist
3732 # TODO: this includes only first N items
3733 for entry in self._grid_entries(renderer):
3734 yield entry
3735 renderer = content.get('horizontalListRenderer')
3736 if renderer:
3737 # TODO
3738 pass
8bdd16b4 3739
29f7c58a 3740 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3741 ep = try_get(
3742 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3743 compat_str)
3744 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3745 if shelf_url:
29f7c58a 3746 # Skipping links to another channels, note that checking for
3747 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3748 # will not work
3749 if skip_channels and '/channels?' in shelf_url:
3750 return
052e1350 3751 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3752 yield self.url_result(shelf_url, video_title=title)
3753 # Shelf may not contain shelf URL, fallback to extraction from content
3754 for entry in self._shelf_entries_from_content(shelf_renderer):
3755 yield entry
c5e8d7af 3756
8bdd16b4 3757 def _playlist_entries(self, video_list_renderer):
3758 for content in video_list_renderer['contents']:
3759 if not isinstance(content, dict):
3760 continue
3761 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3762 if not isinstance(renderer, dict):
3763 continue
3764 video_id = renderer.get('videoId')
3765 if not video_id:
3766 continue
3767 yield self._extract_video(renderer)
07aeced6 3768
3462ffa8 3769 def _rich_entries(self, rich_grid_renderer):
3770 renderer = try_get(
70d5c17b 3771 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3772 video_id = renderer.get('videoId')
3773 if not video_id:
3774 return
3775 yield self._extract_video(renderer)
3776
8bdd16b4 3777 def _video_entry(self, video_renderer):
3778 video_id = video_renderer.get('videoId')
3779 if video_id:
3780 return self._extract_video(video_renderer)
dacb3a86 3781
8bdd16b4 3782 def _post_thread_entries(self, post_thread_renderer):
3783 post_renderer = try_get(
3784 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3785 if not post_renderer:
3786 return
3787 # video attachment
3788 video_renderer = try_get(
895b0931 3789 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3790 video_id = video_renderer.get('videoId')
3791 if video_id:
3792 entry = self._extract_video(video_renderer)
8bdd16b4 3793 if entry:
3794 yield entry
895b0931 3795 # playlist attachment
3796 playlist_id = try_get(
3797 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3798 if playlist_id:
3799 yield self.url_result(
e28f1c0a 3800 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3801 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3802 # inline video links
3803 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3804 for run in runs:
3805 if not isinstance(run, dict):
3806 continue
3807 ep_url = try_get(
3808 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3809 if not ep_url:
3810 continue
3811 if not YoutubeIE.suitable(ep_url):
3812 continue
3813 ep_video_id = YoutubeIE._match_id(ep_url)
3814 if video_id == ep_video_id:
3815 continue
895b0931 3816 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3817
8bdd16b4 3818 def _post_thread_continuation_entries(self, post_thread_continuation):
3819 contents = post_thread_continuation.get('contents')
3820 if not isinstance(contents, list):
3821 return
3822 for content in contents:
3823 renderer = content.get('backstagePostThreadRenderer')
3824 if not isinstance(renderer, dict):
3825 continue
3826 for entry in self._post_thread_entries(renderer):
3827 yield entry
07aeced6 3828
39ed931e 3829 r''' # unused
3830 def _rich_grid_entries(self, contents):
3831 for content in contents:
3832 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3833 if video_renderer:
3834 entry = self._video_entry(video_renderer)
3835 if entry:
3836 yield entry
3837 '''
f4f751af 3838 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3839
70d5c17b 3840 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3841 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3842 for content in contents:
3843 if not isinstance(content, dict):
8bdd16b4 3844 continue
70d5c17b 3845 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3846 if not is_renderer:
70d5c17b 3847 renderer = content.get('richItemRenderer')
3462ffa8 3848 if renderer:
3849 for entry in self._rich_entries(renderer):
3850 yield entry
3851 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3852 continue
3462ffa8 3853 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3854 for isr_content in isr_contents:
3855 if not isinstance(isr_content, dict):
3856 continue
69184e41 3857
3858 known_renderers = {
3859 'playlistVideoListRenderer': self._playlist_entries,
3860 'gridRenderer': self._grid_entries,
3861 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3862 'backstagePostThreadRenderer': self._post_thread_entries,
3863 'videoRenderer': lambda x: [self._video_entry(x)],
3864 }
3865 for key, renderer in isr_content.items():
3866 if key not in known_renderers:
3867 continue
3868 for entry in known_renderers[key](renderer):
3869 if entry:
3870 yield entry
3462ffa8 3871 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3872 break
70d5c17b 3873
3462ffa8 3874 if not continuation_list[0]:
3875 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3876
3877 if not continuation_list[0]:
3878 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3879
3880 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3881 tab_content = try_get(tab, lambda x: x['content'], dict)
3882 if not tab_content:
3883 return
3462ffa8 3884 parent_renderer = (
29f7c58a 3885 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3886 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3887 for entry in extract_entries(parent_renderer):
3888 yield entry
3462ffa8 3889 continuation = continuation_list[0]
fe93e2c4 3890 visitor_data = None
d069eca7 3891
8bdd16b4 3892 for page_num in itertools.count(1):
3893 if not continuation:
3894 break
11f9be09 3895 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3896 response = self._extract_response(
3897 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3898 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3899 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3900
3901 if not response:
8bdd16b4 3902 break
f4f751af 3903 visitor_data = try_get(
3904 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3905
69184e41 3906 known_continuation_renderers = {
3907 'playlistVideoListContinuation': self._playlist_entries,
3908 'gridContinuation': self._grid_entries,
3909 'itemSectionContinuation': self._post_thread_continuation_entries,
3910 'sectionListContinuation': extract_entries, # for feeds
3911 }
8bdd16b4 3912 continuation_contents = try_get(
69184e41 3913 response, lambda x: x['continuationContents'], dict) or {}
3914 continuation_renderer = None
3915 for key, value in continuation_contents.items():
3916 if key not in known_continuation_renderers:
3462ffa8 3917 continue
69184e41 3918 continuation_renderer = value
3919 continuation_list = [None]
3920 for entry in known_continuation_renderers[key](continuation_renderer):
3921 yield entry
3922 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3923 break
3924 if continuation_renderer:
3925 continue
c5e8d7af 3926
a1b535bd 3927 known_renderers = {
3928 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3929 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3930 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3931 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3932 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3933 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3934 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3935 }
cce889b9 3936 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3937 continuation_items = try_get(
cce889b9 3938 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3939 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3940 video_items_renderer = None
3941 for key, value in continuation_item.items():
3942 if key not in known_renderers:
8bdd16b4 3943 continue
a1b535bd 3944 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3945 continuation_list = [None]
a1b535bd 3946 for entry in known_renderers[key][0](video_items_renderer):
3947 yield entry
9ba5705a 3948 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3949 break
3950 if video_items_renderer:
3951 continue
8bdd16b4 3952 break
9558dcec 3953
8bdd16b4 3954 @staticmethod
3955 def _extract_selected_tab(tabs):
3956 for tab in tabs:
cd684175 3957 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3958 if renderer.get('selected') is True:
3959 return renderer
2b3c2546 3960 else:
8bdd16b4 3961 raise ExtractorError('Unable to find selected tab')
b82f815f 3962
47193e02 3963 @classmethod
3964 def _extract_uploader(cls, data):
8bdd16b4 3965 uploader = {}
47193e02 3966 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3967 owner = try_get(
3968 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3969 if owner:
3970 uploader['uploader'] = owner.get('text')
3971 uploader['uploader_id'] = try_get(
3972 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3973 uploader['uploader_url'] = urljoin(
3974 'https://www.youtube.com/',
3975 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3976 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3977
d069eca7 3978 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3979 playlist_id = title = description = channel_url = channel_name = channel_id = None
3980 thumbnails_list = tags = []
3981
8bdd16b4 3982 selected_tab = self._extract_selected_tab(tabs)
3983 renderer = try_get(
3984 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3985 if renderer:
b60419c5 3986 channel_name = renderer.get('title')
3987 channel_url = renderer.get('channelUrl')
3988 channel_id = renderer.get('externalId')
39ed931e 3989 else:
64c0d954 3990 renderer = try_get(
3991 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3992
8bdd16b4 3993 if renderer:
3994 title = renderer.get('title')
ecc97af3 3995 description = renderer.get('description', '')
b60419c5 3996 playlist_id = channel_id
3997 tags = renderer.get('keywords', '').split()
3998 thumbnails_list = (
3999 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 4000 or try_get(
47193e02 4001 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
4002 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 4003 list)
b60419c5 4004 or [])
4005
4006 thumbnails = []
4007 for t in thumbnails_list:
4008 if not isinstance(t, dict):
4009 continue
4010 thumbnail_url = url_or_none(t.get('url'))
4011 if not thumbnail_url:
4012 continue
4013 thumbnails.append({
4014 'url': thumbnail_url,
4015 'width': int_or_none(t.get('width')),
4016 'height': int_or_none(t.get('height')),
4017 })
3462ffa8 4018 if playlist_id is None:
70d5c17b 4019 playlist_id = item_id
4020 if title is None:
39ed931e 4021 title = (
4022 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
4023 or playlist_id)
b60419c5 4024 title += format_field(selected_tab, 'title', ' - %s')
cd684175 4025 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 4026 metadata = {
4027 'playlist_id': playlist_id,
4028 'playlist_title': title,
4029 'playlist_description': description,
4030 'uploader': channel_name,
4031 'uploader_id': channel_id,
4032 'uploader_url': channel_url,
4033 'thumbnails': thumbnails,
4034 'tags': tags,
4035 }
47193e02 4036 availability = self._extract_availability(data)
4037 if availability:
4038 metadata['availability'] = availability
b60419c5 4039 if not channel_id:
4040 metadata.update(self._extract_uploader(data))
4041 metadata.update({
4042 'channel': metadata['uploader'],
4043 'channel_id': metadata['uploader_id'],
4044 'channel_url': metadata['uploader_url']})
11f9be09 4045 ytcfg = self.extract_ytcfg(item_id, webpage)
b60419c5 4046 return self.playlist_result(
d069eca7
M
4047 self._entries(
4048 selected_tab, playlist_id,
4049 self._extract_identity_token(webpage, item_id),
fe93e2c4 4050 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 4051 **metadata)
73c4ac2c 4052
79360d99 4053 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 4054 first_id = last_id = None
11f9be09 4055 ytcfg = self.extract_ytcfg(playlist_id, webpage)
4056 headers = self.generate_api_headers(
fe93e2c4 4057 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4058 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 4059 for page_num in itertools.count(1):
cd7c66cf 4060 videos = list(self._playlist_entries(playlist))
4061 if not videos:
4062 return
2be71994 4063 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4064 if start >= len(videos):
4065 return
4066 for video in videos[start:]:
4067 if video['id'] == first_id:
4068 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4069 return
4070 yield video
4071 first_id = first_id or videos[0]['id']
4072 last_id = videos[-1]['id']
79360d99 4073 watch_endpoint = try_get(
4074 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4075 query = {
4076 'playlistId': playlist_id,
4077 'videoId': watch_endpoint.get('videoId') or last_id,
4078 'index': watch_endpoint.get('index') or len(videos),
4079 'params': watch_endpoint.get('params') or 'OAE%3D'
4080 }
4081 response = self._extract_response(
4082 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4083 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4084 check_get_keys='contents'
4085 )
cd7c66cf 4086 playlist = try_get(
79360d99 4087 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4088
79360d99 4089 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 4090 title = playlist.get('title') or try_get(
4091 data, lambda x: x['titleText']['simpleText'], compat_str)
4092 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4093
4094 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4095 playlist_url = urljoin(url, try_get(
4096 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4097 compat_str))
4098 if playlist_url and playlist_url != url:
4099 return self.url_result(
4100 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4101 video_title=title)
cd7c66cf 4102
8bdd16b4 4103 return self.playlist_result(
79360d99 4104 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 4105 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4106
47193e02 4107 def _extract_availability(self, data):
4108 """
4109 Gets the availability of a given playlist/tab.
4110 Note: Unless YouTube tells us explicitly, we do not assume it is public
4111 @param data: response
4112 """
4113 is_private = is_unlisted = None
4114 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4115 badge_labels = self._extract_badges(renderer)
4116
4117 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4118 privacy_dropdown_entries = try_get(
4119 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4120 for renderer_dict in privacy_dropdown_entries:
4121 is_selected = try_get(
4122 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4123 if not is_selected:
4124 continue
052e1350 4125 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4126 if label:
4127 badge_labels.add(label.lower())
4128 break
4129
4130 for badge_label in badge_labels:
4131 if badge_label == 'unlisted':
4132 is_unlisted = True
4133 elif badge_label == 'private':
4134 is_private = True
4135 elif badge_label == 'public':
4136 is_unlisted = is_private = False
4137 return self._availability(is_private, False, False, False, is_unlisted)
4138
4139 @staticmethod
4140 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4141 sidebar_renderer = try_get(
4142 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4143 for item in sidebar_renderer:
4144 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4145 if renderer:
4146 return renderer
4147
358de58c 4148 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4149 """
4150 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4151 """
5d342002 4152 browse_id = params = None
47193e02 4153 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4154 if not renderer:
4155 return
4156 menu_renderer = try_get(
4157 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4158 for menu_item in menu_renderer:
4159 if not isinstance(menu_item, dict):
358de58c 4160 continue
47193e02 4161 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4162 text = try_get(
4163 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4164 if not text or text.lower() != 'show unavailable videos':
4165 continue
4166 browse_endpoint = try_get(
4167 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4168 browse_id = browse_endpoint.get('browseId')
4169 params = browse_endpoint.get('params')
4170 break
5d342002 4171
11f9be09 4172 ytcfg = self.extract_ytcfg(item_id, webpage)
4173 headers = self.generate_api_headers(
fe93e2c4 4174 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 4175 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4176 visitor_data=try_get(
4177 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4178 query = {
4179 'params': params or 'wgYCCAA=',
4180 'browseId': browse_id or 'VL%s' % item_id
4181 }
4182 return self._extract_response(
4183 item_id=item_id, headers=headers, query=query,
fe93e2c4 4184 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4185 note='Downloading API JSON with unavailable videos')
358de58c 4186
cd7c66cf 4187 def _extract_webpage(self, url, item_id):
a06916d9 4188 retries = self.get_param('extractor_retries', 3)
62bff2c1 4189 count = -1
c705177d 4190 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4191 while count < retries:
62bff2c1 4192 count += 1
14fdfea9 4193 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4194 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4195 if count:
c705177d 4196 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4197 webpage = self._download_webpage(
4198 url, item_id,
cd7c66cf 4199 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
11f9be09 4200 data = self.extract_yt_initial_data(item_id, webpage)
14fdfea9 4201 if data.get('contents') or data.get('currentVideoEndpoint'):
4202 break
95c01b6c 4203 # Extract alerts here only when there is error
4204 self._extract_and_report_alerts(data)
c705177d 4205 if count >= retries:
6a39ee13 4206 raise ExtractorError(last_error)
cd7c66cf 4207 return webpage, data
4208
9297939e 4209 @staticmethod
4210 def _smuggle_data(entries, data):
4211 for entry in entries:
4212 if data:
4213 entry['url'] = smuggle_url(entry['url'], data)
4214 yield entry
4215
cd7c66cf 4216 def _real_extract(self, url):
9297939e 4217 url, smuggled_data = unsmuggle_url(url, {})
4218 if self.is_music_url(url):
4219 smuggled_data['is_music_url'] = True
fe03a6cd 4220 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4221 if info_dict.get('entries'):
4222 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4223 return info_dict
4224
fe03a6cd 4225 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4226
4227 def __real_extract(self, url, smuggled_data):
cd7c66cf 4228 item_id = self._match_id(url)
4229 url = compat_urlparse.urlunparse(
4230 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4231 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4232
fe03a6cd 4233 def get_mobj(url):
4234 mobj = self._url_re.match(url).groupdict()
07cce701 4235 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4236 return mobj
4237
4238 mobj = get_mobj(url)
4239 # Youtube returns incomplete data if tabname is not lower case
4240 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4241
4242 if is_channel:
4243 if smuggled_data.get('is_music_url'):
4244 if item_id[:2] == 'VL':
4245 # Youtube music VL channels have an equivalent playlist
4246 item_id = item_id[2:]
4247 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4248 elif item_id[:2] == 'MP':
4249 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4250 item_id = self._search_regex(
4251 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4252 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4253 'playlist id')
4254 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4255 elif mobj['channel_type'] == 'browse':
4256 # Youtube music /browse/ should be changed to /channel/
4257 pre = 'https://www.youtube.com/channel/%s' % item_id
4258 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4259 # Home URLs should redirect to /videos/
6a39ee13 4260 self.report_warning(
cd7c66cf 4261 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4262 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4263 tab = '/videos'
4264
4265 url = ''.join((pre, tab, post))
4266 mobj = get_mobj(url)
cd7c66cf 4267
4268 # Handle both video/playlist URLs
201c1459 4269 qs = parse_qs(url)
cd7c66cf 4270 video_id = qs.get('v', [None])[0]
4271 playlist_id = qs.get('list', [None])[0]
4272
fe03a6cd 4273 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4274 if not playlist_id:
fe03a6cd 4275 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4276 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4277 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4278 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4279 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4280 mobj = get_mobj(url)
cd7c66cf 4281
4282 if video_id and playlist_id:
a06916d9 4283 if self.get_param('noplaylist'):
cd7c66cf 4284 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4285 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4286 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4287
4288 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4289
18db7548 4290 tabs = try_get(
4291 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4292 if tabs:
4293 selected_tab = self._extract_selected_tab(tabs)
4294 tab_name = selected_tab.get('title', '')
09f1580e 4295 if 'no-youtube-channel-redirect' not in compat_opts:
4296 if mobj['tab'] == '/live':
4297 # Live tab should have redirected to the video
4298 raise ExtractorError('The channel is not currently live', expected=True)
4299 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4300 if not mobj['not_channel'] and item_id[:2] == 'UC':
4301 # Topic channels don't have /videos. Use the equivalent playlist instead
4302 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4303 pl_id = 'UU%s' % item_id[2:]
4304 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4305 try:
4306 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4307 for alert_type, alert_message in self._extract_alerts(pl_data):
4308 if alert_type == 'error':
4309 raise ExtractorError('Youtube said: %s' % alert_message)
4310 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4311 except ExtractorError:
4312 self.report_warning('The playlist gave error. Falling back to channel URL')
4313 else:
4314 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4315
4316 self.write_debug('Final URL: %s' % url)
4317
358de58c 4318 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4319 if 'no-youtube-unavailable-videos' not in compat_opts:
4320 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4321 self._extract_and_report_alerts(data)
8bdd16b4 4322 tabs = try_get(
4323 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4324 if tabs:
d069eca7 4325 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4326
8bdd16b4 4327 playlist = try_get(
4328 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4329 if playlist:
79360d99 4330 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4331
a0566bbf 4332 video_id = try_get(
4333 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4334 compat_str) or video_id
8bdd16b4 4335 if video_id:
09f1580e 4336 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4337 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4338 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4339
8bdd16b4 4340 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4341
c5e8d7af 4342
8bdd16b4 4343class YoutubePlaylistIE(InfoExtractor):
4344 IE_DESC = 'YouTube.com playlists'
4345 _VALID_URL = r'''(?x)(?:
4346 (?:https?://)?
4347 (?:\w+\.)?
4348 (?:
4349 (?:
4350 youtube(?:kids)?\.com|
29f7c58a 4351 invidio\.us
8bdd16b4 4352 )
4353 /.*?\?.*?\blist=
4354 )?
4355 (?P<id>%(playlist_id)s)
4356 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4357 IE_NAME = 'youtube:playlist'
cdc628a4 4358 _TESTS = [{
8bdd16b4 4359 'note': 'issue #673',
4360 'url': 'PLBB231211A4F62143',
cdc628a4 4361 'info_dict': {
8bdd16b4 4362 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4363 'id': 'PLBB231211A4F62143',
4364 'uploader': 'Wickydoo',
4365 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4366 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4367 },
4368 'playlist_mincount': 29,
4369 }, {
4370 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4371 'info_dict': {
4372 'title': 'YDL_safe_search',
4373 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4374 },
4375 'playlist_count': 2,
4376 'skip': 'This playlist is private',
9558dcec 4377 }, {
8bdd16b4 4378 'note': 'embedded',
4379 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4380 'playlist_count': 4,
9558dcec 4381 'info_dict': {
8bdd16b4 4382 'title': 'JODA15',
4383 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4384 'uploader': 'milan',
4385 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4386 }
cdc628a4 4387 }, {
8bdd16b4 4388 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4389 'playlist_mincount': 654,
8bdd16b4 4390 'info_dict': {
4391 'title': '2018 Chinese New Singles (11/6 updated)',
4392 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4393 'uploader': 'LBK',
4394 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4395 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4396 }
daa0df9e 4397 }, {
29f7c58a 4398 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4399 'only_matching': True,
4400 }, {
4401 # music album playlist
4402 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4403 'only_matching': True,
4404 }]
4405
4406 @classmethod
4407 def suitable(cls, url):
201c1459 4408 if YoutubeTabIE.suitable(url):
4409 return False
1bdae7d3 4410 # Hack for lazy extractors until more generic solution is implemented
4411 # (see #28780)
4412 from .youtube import parse_qs
201c1459 4413 qs = parse_qs(url)
4414 if qs.get('v', [None])[0]:
4415 return False
4416 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4417
4418 def _real_extract(self, url):
4419 playlist_id = self._match_id(url)
46953e7e 4420 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4421 url = update_url_query(
4422 'https://www.youtube.com/playlist',
4423 parse_qs(url) or {'list': playlist_id})
4424 if is_music_url:
4425 url = smuggle_url(url, {'is_music_url': True})
4426 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4427
4428
4429class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4430 IE_DESC = 'youtu.be'
29f7c58a 4431 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4432 _TESTS = [{
8bdd16b4 4433 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4434 'info_dict': {
4435 'id': 'yeWKywCrFtk',
4436 'ext': 'mp4',
4437 'title': 'Small Scale Baler and Braiding Rugs',
4438 'uploader': 'Backus-Page House Museum',
4439 'uploader_id': 'backuspagemuseum',
4440 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4441 'upload_date': '20161008',
4442 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4443 'categories': ['Nonprofits & Activism'],
4444 'tags': list,
4445 'like_count': int,
4446 'dislike_count': int,
4447 },
4448 'params': {
4449 'noplaylist': True,
4450 'skip_download': True,
4451 },
39e7107d 4452 }, {
8bdd16b4 4453 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4454 'only_matching': True,
cdc628a4
PH
4455 }]
4456
8bdd16b4 4457 def _real_extract(self, url):
5ad28e7f 4458 mobj = self._match_valid_url(url)
29f7c58a 4459 video_id = mobj.group('id')
4460 playlist_id = mobj.group('playlist_id')
8bdd16b4 4461 return self.url_result(
29f7c58a 4462 update_url_query('https://www.youtube.com/watch', {
4463 'v': video_id,
4464 'list': playlist_id,
4465 'feature': 'youtu.be',
4466 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4467
4468
4469class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4470 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4471 _VALID_URL = r'ytuser:(?P<id>.+)'
4472 _TESTS = [{
4473 'url': 'ytuser:phihag',
4474 'only_matching': True,
4475 }]
4476
4477 def _real_extract(self, url):
4478 user_id = self._match_id(url)
4479 return self.url_result(
4480 'https://www.youtube.com/user/%s' % user_id,
4481 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4482
b05654f0 4483
3d3dddc9 4484class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4485 IE_NAME = 'youtube:favorites'
4486 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4487 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4488 _LOGIN_REQUIRED = True
4489 _TESTS = [{
4490 'url': ':ytfav',
4491 'only_matching': True,
4492 }, {
4493 'url': ':ytfavorites',
4494 'only_matching': True,
4495 }]
4496
4497 def _real_extract(self, url):
4498 return self.url_result(
4499 'https://www.youtube.com/playlist?list=LL',
4500 ie=YoutubeTabIE.ie_key())
4501
4502
79360d99 4503class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4504 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4505 # there doesn't appear to be a real limit, for example if you search for
4506 # 'python' you get more than 8.000.000 results
4507 _MAX_RESULTS = float('inf')
78caa52a 4508 IE_NAME = 'youtube:search'
b05654f0 4509 _SEARCH_KEY = 'ytsearch'
6c894ea1 4510 _SEARCH_PARAMS = None
9dd8e46a 4511 _TESTS = []
b05654f0 4512
6c894ea1 4513 def _entries(self, query, n):
a5c56234 4514 data = {'query': query}
6c894ea1
U
4515 if self._SEARCH_PARAMS:
4516 data['params'] = self._SEARCH_PARAMS
4517 total = 0
fe93e2c4 4518 continuation = {}
6c894ea1 4519 for page_num in itertools.count(1):
fe93e2c4 4520 data.update(continuation)
79360d99 4521 search = self._extract_response(
4522 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4523 check_get_keys=('contents', 'onResponseReceivedCommands')
4524 )
6c894ea1 4525 if not search:
b4c08069 4526 break
6c894ea1
U
4527 slr_contents = try_get(
4528 search,
4529 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4530 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4531 list)
4532 if not slr_contents:
a22b2fd1 4533 break
0366ae87 4534
0366ae87
M
4535 # Youtube sometimes adds promoted content to searches,
4536 # changing the index location of videos and token.
4537 # So we search through all entries till we find them.
fe93e2c4 4538 continuation = None
30a074c2 4539 for slr_content in slr_contents:
fe93e2c4 4540 if not continuation:
4541 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4542
30a074c2 4543 isr_contents = try_get(
4544 slr_content,
4545 lambda x: x['itemSectionRenderer']['contents'],
4546 list)
9da76d30 4547 if not isr_contents:
30a074c2 4548 continue
4549 for content in isr_contents:
4550 if not isinstance(content, dict):
4551 continue
4552 video = content.get('videoRenderer')
4553 if not isinstance(video, dict):
4554 continue
4555 video_id = video.get('videoId')
4556 if not video_id:
4557 continue
4558
4559 yield self._extract_video(video)
4560 total += 1
4561 if total == n:
4562 return
0366ae87 4563
fe93e2c4 4564 if not continuation:
6c894ea1 4565 break
b05654f0 4566
6c894ea1
U
4567 def _get_n_results(self, query, n):
4568 """Get a specified number of results for a query"""
11f9be09 4569 return self.playlist_result(self._entries(query, n), query, query)
75dff0ee 4570
c9ae7b95 4571
a3dd9248 4572class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4573 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4574 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4575 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4576 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4577
c9ae7b95 4578
386e1dd9 4579class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4580 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4581 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4582 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4583 # _MAX_RESULTS = 100
3462ffa8 4584 _TESTS = [{
4585 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4586 'playlist_mincount': 5,
4587 'info_dict': {
11f9be09 4588 'id': 'youtube-dl test video',
3462ffa8 4589 'title': 'youtube-dl test video',
4590 }
4591 }, {
4592 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4593 'only_matching': True,
4594 }]
4595
386e1dd9 4596 @classmethod
4597 def _make_valid_url(cls):
4598 return cls._VALID_URL
4599
3462ffa8 4600 def _real_extract(self, url):
386e1dd9 4601 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4602 query = (qs.get('search_query') or qs.get('q'))[0]
4603 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4604 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4605
4606
4607class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4608 """
25f14e9f 4609 Base class for feed extractors
3d3dddc9 4610 Subclasses must define the _FEED_NAME property.
d7ae0639 4611 """
b2e8bc1b 4612 _LOGIN_REQUIRED = True
ef2f3c7f 4613 _TESTS = []
d7ae0639
JMF
4614
4615 @property
4616 def IE_NAME(self):
78caa52a 4617 return 'youtube:%s' % self._FEED_NAME
04cc9617 4618
3853309f 4619 def _real_extract(self, url):
3d3dddc9 4620 return self.url_result(
4621 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4622 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4623
4624
ef2f3c7f 4625class YoutubeWatchLaterIE(InfoExtractor):
4626 IE_NAME = 'youtube:watchlater'
70d5c17b 4627 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4628 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4629 _TESTS = [{
8bdd16b4 4630 'url': ':ytwatchlater',
bc7a9cd8
S
4631 'only_matching': True,
4632 }]
25f14e9f
S
4633
4634 def _real_extract(self, url):
ef2f3c7f 4635 return self.url_result(
4636 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4637
4638
25f14e9f
S
4639class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4640 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4641 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4642 _FEED_NAME = 'recommended'
45db527f 4643 _LOGIN_REQUIRED = False
3d3dddc9 4644 _TESTS = [{
4645 'url': ':ytrec',
4646 'only_matching': True,
4647 }, {
4648 'url': ':ytrecommended',
4649 'only_matching': True,
4650 }, {
4651 'url': 'https://youtube.com',
4652 'only_matching': True,
4653 }]
1ed5b5c9 4654
1ed5b5c9 4655
25f14e9f 4656class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4657 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4658 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4659 _FEED_NAME = 'subscriptions'
3d3dddc9 4660 _TESTS = [{
4661 'url': ':ytsubs',
4662 'only_matching': True,
4663 }, {
4664 'url': ':ytsubscriptions',
4665 'only_matching': True,
4666 }]
1ed5b5c9 4667
1ed5b5c9 4668
25f14e9f 4669class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4670 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4671 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4672 _FEED_NAME = 'history'
3d3dddc9 4673 _TESTS = [{
4674 'url': ':ythistory',
4675 'only_matching': True,
4676 }]
1ed5b5c9
JMF
4677
4678
15870e90
PH
4679class YoutubeTruncatedURLIE(InfoExtractor):
4680 IE_NAME = 'youtube:truncated_url'
4681 IE_DESC = False # Do not list
975d35db 4682 _VALID_URL = r'''(?x)
b95aab84
PH
4683 (?:https?://)?
4684 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4685 (?:watch\?(?:
c4808c60 4686 feature=[a-z_]+|
b95aab84
PH
4687 annotation_id=annotation_[^&]+|
4688 x-yt-cl=[0-9]+|
c1708b89 4689 hl=[^&]*|
287be8c6 4690 t=[0-9]+
b95aab84
PH
4691 )?
4692 |
4693 attribution_link\?a=[^&]+
4694 )
4695 $
975d35db 4696 '''
15870e90 4697
c4808c60 4698 _TESTS = [{
2d3d2997 4699 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4700 'only_matching': True,
dc2fc736 4701 }, {
2d3d2997 4702 'url': 'https://www.youtube.com/watch?',
dc2fc736 4703 'only_matching': True,
b95aab84
PH
4704 }, {
4705 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4706 'only_matching': True,
4707 }, {
4708 'url': 'https://www.youtube.com/watch?feature=foo',
4709 'only_matching': True,
c1708b89
PH
4710 }, {
4711 'url': 'https://www.youtube.com/watch?hl=en-GB',
4712 'only_matching': True,
287be8c6
PH
4713 }, {
4714 'url': 'https://www.youtube.com/watch?t=2372',
4715 'only_matching': True,
c4808c60
PH
4716 }]
4717
15870e90
PH
4718 def _real_extract(self, url):
4719 raise ExtractorError(
78caa52a
PH
4720 'Did you forget to quote the URL? Remember that & is a meta '
4721 'character in most shells, so you want to put the URL in quotes, '
3867038a 4722 'like youtube-dl '
2d3d2997 4723 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4724 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4725 expected=True)
772fd5cc
PH
4726
4727
4728class YoutubeTruncatedIDIE(InfoExtractor):
4729 IE_NAME = 'youtube:truncated_id'
4730 IE_DESC = False # Do not list
b95aab84 4731 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4732
4733 _TESTS = [{
4734 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4735 'only_matching': True,
4736 }]
4737
4738 def _real_extract(self, url):
4739 video_id = self._match_id(url)
4740 raise ExtractorError(
4741 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4742 expected=True)