]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Don't try to merge with final extension
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
641ad5d8 41 is_html,
94278f72 42 mimetype2ext,
9c0d7f49 43 network_exceptions,
11f9be09 44 orderedSet,
6310acf5 45 parse_codecs,
49bd8c66 46 parse_count,
7c80519c 47 parse_duration,
7ea65411 48 parse_iso8601,
dca3ff4a 49 qualities,
3995d37d 50 remove_start,
cf7e015f 51 smuggle_url,
dbdaaa23 52 str_or_none,
c93d53f5 53 str_to_int,
7c365c21 54 traverse_obj,
556dbe7f 55 try_get,
c5e8d7af
PH
56 unescapeHTML,
57 unified_strdate,
cf7e015f 58 unsmuggle_url,
8bdd16b4 59 update_url_query,
21c340b8 60 url_or_none,
6e6bc8da 61 urlencode_postdata,
fe93e2c4 62 urljoin,
7c365c21 63 variadic,
c5e8d7af
PH
64)
65
5f6a1245 66
201c1459 67def parse_qs(url):
68 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
69
70
000c15a4 71# any clients starting with _ cannot be explicity requested by the user
72INNERTUBE_CLIENTS = {
73 'web': {
74 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
75 'INNERTUBE_CONTEXT': {
76 'client': {
77 'clientName': 'WEB',
78 'clientVersion': '2.20210622.10.00',
79 }
80 },
81 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
82 },
83 'web_embedded': {
84 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
85 'INNERTUBE_CONTEXT': {
86 'client': {
87 'clientName': 'WEB_EMBEDDED_PLAYER',
88 'clientVersion': '1.20210620.0.1',
89 },
90 },
91 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
92 },
93 'web_music': {
94 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
95 'INNERTUBE_HOST': 'music.youtube.com',
96 'INNERTUBE_CONTEXT': {
97 'client': {
98 'clientName': 'WEB_REMIX',
99 'clientVersion': '1.20210621.00.00',
100 }
101 },
102 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
103 },
e7e94f2a
D
104 'web_creator': {
105 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
106 'INNERTUBE_CONTEXT': {
107 'client': {
108 'clientName': 'WEB_CREATOR',
109 'clientVersion': '1.20210621.00.00',
110 }
111 },
112 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
113 },
000c15a4 114 'android': {
115 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
116 'INNERTUBE_CONTEXT': {
117 'client': {
118 'clientName': 'ANDROID',
119 'clientVersion': '16.20',
120 }
121 },
122 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
123 },
124 'android_embedded': {
125 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
126 'INNERTUBE_CONTEXT': {
127 'client': {
128 'clientName': 'ANDROID_EMBEDDED_PLAYER',
129 'clientVersion': '16.20',
130 },
131 },
132 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
133 },
134 'android_music': {
135 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
136 'INNERTUBE_HOST': 'music.youtube.com',
137 'INNERTUBE_CONTEXT': {
138 'client': {
139 'clientName': 'ANDROID_MUSIC',
140 'clientVersion': '4.32',
141 }
142 },
143 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
144 },
e7e94f2a
D
145 'android_creator': {
146 'INNERTUBE_CONTEXT': {
147 'client': {
148 'clientName': 'ANDROID_CREATOR',
149 'clientVersion': '21.24.100',
150 },
151 },
152 'INNERTUBE_CONTEXT_CLIENT_NAME': 14
153 },
3619f78d 154 # ios has HLS live streams
155 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
000c15a4 156 'ios': {
157 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
158 'INNERTUBE_CONTEXT': {
159 'client': {
160 'clientName': 'IOS',
161 'clientVersion': '16.20',
162 }
163 },
164 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
165 },
166 'ios_embedded': {
167 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
168 'INNERTUBE_CONTEXT': {
169 'client': {
170 'clientName': 'IOS_MESSAGES_EXTENSION',
171 'clientVersion': '16.20',
172 },
173 },
174 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
175 },
176 'ios_music': {
177 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
178 'INNERTUBE_HOST': 'music.youtube.com',
179 'INNERTUBE_CONTEXT': {
180 'client': {
181 'clientName': 'IOS_MUSIC',
182 'clientVersion': '4.32',
183 },
184 },
185 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
186 },
e7e94f2a
D
187 'ios_creator': {
188 'INNERTUBE_CONTEXT': {
189 'client': {
190 'clientName': 'IOS_CREATOR',
191 'clientVersion': '21.24.100',
192 },
193 },
194 'INNERTUBE_CONTEXT_CLIENT_NAME': 15
195 },
3619f78d 196 # mweb has 'ultralow' formats
197 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 198 'mweb': {
199 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
200 'INNERTUBE_CONTEXT': {
201 'client': {
202 'clientName': 'MWEB',
203 'clientVersion': '2.20210721.07.00',
204 }
205 },
206 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
207 },
208}
209
210
211def build_innertube_clients():
65c2fde2 212 third_party = {
213 'embedUrl': 'https://google.com', # Can be any valid URL
214 }
000c15a4 215 base_clients = ('android', 'web', 'ios', 'mweb')
216 priority = qualities(base_clients[::-1])
217
218 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 219 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 220 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
221 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
222 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
223
224 if client in base_clients:
225 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
226 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
65c2fde2 227 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 228 agegate_ytcfg['priority'] -= 1
229 elif client.endswith('_embedded'):
65c2fde2 230 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 231 ytcfg['priority'] -= 2
232 else:
233 ytcfg['priority'] -= 3
234
235
236build_innertube_clients()
237
238
de7f3446 239class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 240 """Provide base functions for Youtube extractors"""
e00eb564 241
3462ffa8 242 _RESERVED_NAMES = (
3619f78d 243 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
244 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
245 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 246 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 247
3619f78d 248 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
249
b2e8bc1b 250 _NETRC_MACHINE = 'youtube'
3619f78d 251
b2e8bc1b
JMF
252 # If True it will raise an error if no login info is provided
253 _LOGIN_REQUIRED = False
254
3619f78d 255 r''' # Unused since login is broken
256 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
257 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
258
259 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
260 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
261 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
262 '''
d0ba5587 263
b2e8bc1b 264 def _login(self):
83317f69 265 """
266 Attempt to log in to YouTube.
267 True is returned if successful or skipped.
268 False is returned if login failed.
269
270 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
271 """
9d5d4d64 272
273 def warn(message):
274 self.report_warning(message)
275
276 # username+password login is broken
982ee69a
MB
277 if (self._LOGIN_REQUIRED
278 and self.get_param('cookiefile') is None
279 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 280 self.raise_login_required(
281 'Login details are needed to download this content', method='cookies')
68217024 282 username, password = self._get_login_info()
9d5d4d64 283 if username:
284 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
285 return
9d5d4d64 286
2d6659b9 287 # Everything below this is broken!
288 r'''
b2e8bc1b
JMF
289 # No authentication to be performed
290 if username is None:
a06916d9 291 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 292 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 293 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 294 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 295 return True
b2e8bc1b 296
7cc3570e
PH
297 login_page = self._download_webpage(
298 self._LOGIN_URL, None,
69ea8ca4
PH
299 note='Downloading login page',
300 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
301 if login_page is False:
302 return
b2e8bc1b 303
1212e997 304 login_form = self._hidden_inputs(login_page)
c5e8d7af 305
e00eb564
S
306 def req(url, f_req, note, errnote):
307 data = login_form.copy()
308 data.update({
309 'pstMsg': 1,
310 'checkConnection': 'youtube',
311 'checkedDomains': 'youtube',
312 'hl': 'en',
313 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 314 'f.req': json.dumps(f_req),
e00eb564
S
315 'flowName': 'GlifWebSignIn',
316 'flowEntry': 'ServiceLogin',
baf67a60
S
317 # TODO: reverse actual botguard identifier generation algo
318 'bgRequest': '["identifier",""]',
041bc3ad 319 })
e00eb564
S
320 return self._download_json(
321 url, None, note=note, errnote=errnote,
322 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
323 fatal=False,
324 data=urlencode_postdata(data), headers={
325 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
326 'Google-Accounts-XSRF': 1,
327 })
328
3995d37d
S
329 lookup_req = [
330 username,
331 None, [], None, 'US', None, None, 2, False, True,
332 [
333 None, None,
334 [2, 1, None, 1,
335 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
336 None, [], 4],
337 1, [None, None, []], None, None, None, True
338 ],
339 username,
340 ]
341
e00eb564 342 lookup_results = req(
3995d37d 343 self._LOOKUP_URL, lookup_req,
e00eb564
S
344 'Looking up account info', 'Unable to look up account info')
345
346 if lookup_results is False:
347 return False
041bc3ad 348
3995d37d
S
349 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
350 if not user_hash:
351 warn('Unable to extract user hash')
352 return False
353
354 challenge_req = [
355 user_hash,
356 None, 1, None, [1, None, None, None, [password, None, True]],
357 [
358 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
359 1, [None, None, []], None, None, None, True
360 ]]
83317f69 361
3995d37d
S
362 challenge_results = req(
363 self._CHALLENGE_URL, challenge_req,
364 'Logging in', 'Unable to log in')
83317f69 365
3995d37d 366 if challenge_results is False:
e00eb564 367 return
83317f69 368
3995d37d
S
369 login_res = try_get(challenge_results, lambda x: x[0][5], list)
370 if login_res:
371 login_msg = try_get(login_res, lambda x: x[5], compat_str)
372 warn(
373 'Unable to login: %s' % 'Invalid password'
374 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
375 return False
376
377 res = try_get(challenge_results, lambda x: x[0][-1], list)
378 if not res:
379 warn('Unable to extract result entry')
380 return False
381
9a6628aa
S
382 login_challenge = try_get(res, lambda x: x[0][0], list)
383 if login_challenge:
384 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
385 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
386 # SEND_SUCCESS - TFA code has been successfully sent to phone
387 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 388 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
389 if status == 'QUOTA_EXCEEDED':
390 warn('Exceeded the limit of TFA codes, try later')
391 return False
392
393 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
394 if not tl:
395 warn('Unable to extract TL')
396 return False
397
398 tfa_code = self._get_tfa_info('2-step verification code')
399
400 if not tfa_code:
401 warn(
402 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
403 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
404 return False
405
406 tfa_code = remove_start(tfa_code, 'G-')
407
408 tfa_req = [
409 user_hash, None, 2, None,
410 [
411 9, None, None, None, None, None, None, None,
412 [None, tfa_code, True, 2]
413 ]]
414
415 tfa_results = req(
416 self._TFA_URL.format(tl), tfa_req,
417 'Submitting TFA code', 'Unable to submit TFA code')
418
419 if tfa_results is False:
420 return False
421
422 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
423 if tfa_res:
424 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
425 warn(
426 'Unable to finish TFA: %s' % 'Invalid TFA code'
427 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
428 return False
429
430 check_cookie_url = try_get(
431 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
432 else:
433 CHALLENGES = {
434 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
435 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
436 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
437 }
438 challenge = CHALLENGES.get(
439 challenge_str,
440 '%s returned error %s.' % (self.IE_NAME, challenge_str))
441 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
442 return False
3995d37d
S
443 else:
444 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
445
446 if not check_cookie_url:
447 warn('Unable to extract CheckCookie URL')
448 return False
e00eb564
S
449
450 check_cookie_results = self._download_webpage(
3995d37d
S
451 check_cookie_url, None, 'Checking cookie', fatal=False)
452
453 if check_cookie_results is False:
454 return False
e00eb564 455
3995d37d
S
456 if 'https://myaccount.google.com/' not in check_cookie_results:
457 warn('Unable to log in')
b2e8bc1b 458 return False
e00eb564 459
b2e8bc1b 460 return True
2d6659b9 461 '''
b2e8bc1b 462
cce889b9 463 def _initialize_consent(self):
464 cookies = self._get_cookies('https://www.youtube.com/')
465 if cookies.get('__Secure-3PSID'):
466 return
467 consent_id = None
468 consent = cookies.get('CONSENT')
469 if consent:
470 if 'YES' in consent.value:
471 return
472 consent_id = self._search_regex(
473 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
474 if not consent_id:
475 consent_id = random.randint(100, 999)
476 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 477
b2e8bc1b 478 def _real_initialize(self):
cce889b9 479 self._initialize_consent()
b2e8bc1b
JMF
480 if self._downloader is None:
481 return
b2e8bc1b
JMF
482 if not self._login():
483 return
c5e8d7af 484
a0566bbf 485 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 486 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
487 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 488
000c15a4 489 def _get_default_ytcfg(self, client='web'):
490 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 491
000c15a4 492 def _get_innertube_host(self, client='web'):
493 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 494
000c15a4 495 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 496 # try_get but with fallback to default ytcfg client values when present
497 _func = lambda y: try_get(y, getter, expected_type)
498 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
499
000c15a4 500 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 501 return self._ytcfg_get_safe(
502 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
503 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 504
314ee305 505 @staticmethod
11f9be09 506 def _extract_session_index(*data):
507 for ytcfg in data:
508 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
509 if session_index is not None:
510 return session_index
314ee305 511
000c15a4 512 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 513 return self._ytcfg_get_safe(
514 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
515 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 516
000c15a4 517 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 518 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
519
000c15a4 520 def _extract_context(self, ytcfg=None, default_client='web'):
109dd3b2 521 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
522 context = _get_context(ytcfg)
523 if context:
524 return context
525
526 context = _get_context(self._get_default_ytcfg(default_client))
527 if not ytcfg:
528 return context
529
530 # Recreate the client context (required)
531 context['client'].update({
532 'clientVersion': self._extract_client_version(ytcfg, default_client),
533 'clientName': self._extract_client_name(ytcfg, default_client),
534 })
535 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
536 if visitor_data:
537 context['client']['visitorData'] = visitor_data
538 return context
539
cf87314d 540 _SAPISID = None
541
109dd3b2 542 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 543 time_now = round(time.time())
cf87314d 544 if self._SAPISID is None:
545 yt_cookies = self._get_cookies('https://www.youtube.com')
546 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
547 # See: https://github.com/yt-dlp/yt-dlp/issues/393
548 sapisid_cookie = dict_get(
549 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
550 if sapisid_cookie and sapisid_cookie.value:
551 self._SAPISID = sapisid_cookie.value
552 self.write_debug('Extracted SAPISID cookie')
553 # SAPISID cookie is required if not already present
554 if not yt_cookies.get('SAPISID'):
555 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
556 self._set_cookie(
557 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
558 else:
559 self._SAPISID = False
560 if not self._SAPISID:
561 return None
1974e99f 562 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
563 sapisidhash = hashlib.sha1(
cf87314d 564 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
1974e99f 565 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
566
567 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 568 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 569 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 570
109dd3b2 571 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 572 data.update(query)
11f9be09 573 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 574 real_headers.update({'content-type': 'application/json'})
575 if headers:
576 real_headers.update(headers)
545cc85d 577 return self._download_json(
109dd3b2 578 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 579 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 580 data=json.dumps(data).encode('utf8'), headers=real_headers,
581 query={'key': api_key or self._extract_api_key()})
582
11f9be09 583 def extract_yt_initial_data(self, video_id, webpage):
8bdd16b4 584 return self._parse_json(
585 self._search_regex(
29f7c58a 586 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 587 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 588 video_id)
0c148415 589
a1c5d2ca 590 def _extract_identity_token(self, webpage, item_id):
11f9be09 591 if not webpage:
592 return None
593 ytcfg = self.extract_ytcfg(item_id, webpage)
a1c5d2ca
M
594 if ytcfg:
595 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
596 if token:
597 return token
598 return self._search_regex(
599 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
600 'identity token', default=None)
601
602 @staticmethod
fe93e2c4 603 def _extract_account_syncid(*args):
8ea3f7b9 604 """
605 Extract syncId required to download private playlists of secondary channels
fe93e2c4 606 @params response and/or ytcfg
8ea3f7b9 607 """
fe93e2c4 608 for data in args:
609 # ytcfg includes channel_syncid if on secondary channel
610 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
611 if delegated_sid:
612 return delegated_sid
613 sync_ids = (try_get(
614 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
615 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
616 if len(sync_ids) >= 2 and sync_ids[1]:
617 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
618 # and just "user_syncid||" for primary channel. We only want the channel_syncid
619 return sync_ids[0]
a1c5d2ca 620
11f9be09 621 def extract_ytcfg(self, video_id, webpage):
8c54a305 622 if not webpage:
623 return {}
29f7c58a 624 return self._parse_json(
625 self._search_regex(
626 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 627 default='{}'), video_id, fatal=False) or {}
628
11f9be09 629 def generate_api_headers(
630 self, ytcfg=None, identity_token=None, account_syncid=None,
000c15a4 631 visitor_data=None, api_hostname=None, default_client='web', session_index=None):
11f9be09 632 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 633 headers = {
109dd3b2 634 'X-YouTube-Client-Name': compat_str(
11f9be09 635 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
636 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
109dd3b2 637 'Origin': origin
f4f751af 638 }
2d6659b9 639 if not visitor_data and ytcfg:
640 visitor_data = try_get(
11f9be09 641 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 642 if identity_token:
109dd3b2 643 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 644 if account_syncid:
645 headers['X-Goog-PageId'] = account_syncid
314ee305 646 if session_index is None and ytcfg:
647 session_index = self._extract_session_index(ytcfg)
648 if account_syncid or session_index is not None:
649 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 650 if visitor_data:
109dd3b2 651 headers['X-Goog-Visitor-Id'] = visitor_data
652 auth = self._generate_sapisidhash_header(origin)
f4f751af 653 if auth is not None:
654 headers['Authorization'] = auth
109dd3b2 655 headers['X-Origin'] = origin
f4f751af 656 return headers
29f7c58a 657
2d6659b9 658 @staticmethod
659 def _build_api_continuation_query(continuation, ctp=None):
660 query = {
661 'continuation': continuation
662 }
663 # TODO: Inconsistency with clickTrackingParams.
664 # Currently we have a fixed ctp contained within context (from ytcfg)
665 # and a ctp in root query for continuation.
666 if ctp:
667 query['clickTracking'] = {'clickTrackingParams': ctp}
668 return query
669
2d6659b9 670 @classmethod
671 def _extract_next_continuation_data(cls, renderer):
672 next_continuation = try_get(
673 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
674 lambda x: x['continuation']['reloadContinuationData']), dict)
675 if not next_continuation:
676 return
677 continuation = next_continuation.get('continuation')
678 if not continuation:
679 return
680 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 681 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 682
683 @classmethod
684 def _extract_continuation_ep_data(cls, continuation_ep: dict):
685 if isinstance(continuation_ep, dict):
686 continuation = try_get(
687 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
688 if not continuation:
689 return
690 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 691 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 692
693 @classmethod
694 def _extract_continuation(cls, renderer):
695 next_continuation = cls._extract_next_continuation_data(renderer)
696 if next_continuation:
697 return next_continuation
fe93e2c4 698
2d6659b9 699 contents = []
700 for key in ('contents', 'items'):
701 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 702
2d6659b9 703 for content in contents:
704 if not isinstance(content, dict):
705 continue
706 continuation_ep = try_get(
707 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
708 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
709 dict)
710 continuation = cls._extract_continuation_ep_data(continuation_ep)
711 if continuation:
712 return continuation
713
fe93e2c4 714 @classmethod
715 def _extract_alerts(cls, data):
109dd3b2 716 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
717 if not isinstance(alert_dict, dict):
718 continue
719 for alert in alert_dict.values():
720 alert_type = alert.get('type')
721 if not alert_type:
722 continue
052e1350 723 message = cls._get_text(alert, 'text')
109dd3b2 724 if message:
725 yield alert_type, message
726
641ad5d8 727 def _report_alerts(self, alerts, expected=True, fatal=True):
109dd3b2 728 errors = []
729 warnings = []
730 for alert_type, alert_message in alerts:
641ad5d8 731 if alert_type.lower() == 'error' and fatal:
109dd3b2 732 errors.append([alert_type, alert_message])
733 else:
734 warnings.append([alert_type, alert_message])
735
736 for alert_type, alert_message in (warnings + errors[:-1]):
737 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
738 if errors:
739 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
740
741 def _extract_and_report_alerts(self, data, *args, **kwargs):
742 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
743
47193e02 744 def _extract_badges(self, renderer: dict):
745 badges = set()
746 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
747 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
748 if label:
749 badges.add(label.lower())
750 return badges
751
752 @staticmethod
052e1350 753 def _get_text(data, *path_list, max_runs=None):
754 for path in path_list or [None]:
755 if path is None:
756 obj = [data]
757 else:
758 obj = traverse_obj(data, path, default=[])
759 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
760 obj = [obj]
761 for item in obj:
762 text = try_get(item, lambda x: x['simpleText'], compat_str)
763 if text:
764 return text
765 runs = try_get(item, lambda x: x['runs'], list) or []
766 if not runs and isinstance(item, list):
767 runs = item
768
769 runs = runs[:min(len(runs), max_runs or len(runs))]
770 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
771 if text:
772 return text
47193e02 773
109dd3b2 774 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
775 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 776 default_client='web'):
109dd3b2 777 response = None
778 last_error = None
779 count = -1
780 retries = self.get_param('extractor_retries', 3)
781 if check_get_keys is None:
782 check_get_keys = []
783 while count < retries:
784 count += 1
785 if last_error:
786 self.report_warning('%s. Retrying ...' % last_error)
787 try:
788 response = self._call_api(
789 ep=ep, fatal=True, headers=headers,
790 video_id=item_id, query=query,
791 context=self._extract_context(ytcfg, default_client),
792 api_key=self._extract_api_key(ytcfg, default_client),
793 api_hostname=api_hostname, default_client=default_client,
794 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
795 except ExtractorError as e:
9c0d7f49 796 if isinstance(e.cause, network_exceptions):
641ad5d8 797 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
798 e.cause.seek(0)
799 yt_error = try_get(
800 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
801 lambda x: x['error']['message'], compat_str)
802 if yt_error:
803 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 804 # Downloading page may result in intermittent 5xx HTTP error
805 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 806 # We also want to catch all other network exceptions since errors in later pages can be troublesome
807 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
808 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
809 last_error = error_to_compat_str(e.cause or e)
810 if count < retries:
811 continue
109dd3b2 812 if fatal:
813 raise
814 else:
815 self.report_warning(error_to_compat_str(e))
816 return
817
818 else:
819 # Youtube may send alerts if there was an issue with the continuation page
820 try:
821 self._extract_and_report_alerts(response, expected=False)
822 except ExtractorError as e:
823 if fatal:
824 raise
825 self.report_warning(error_to_compat_str(e))
826 return
827 if not check_get_keys or dict_get(response, check_get_keys):
828 break
829 # Youtube sometimes sends incomplete data
830 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
831 last_error = 'Incomplete data received'
832 if count >= retries:
833 if fatal:
834 raise ExtractorError(last_error)
835 else:
836 self.report_warning(last_error)
837 return
838 return response
839
9297939e 840 @staticmethod
841 def is_music_url(url):
842 return re.match(r'https?://music\.youtube\.com/', url) is not None
843
30a074c2 844 def _extract_video(self, renderer):
845 video_id = renderer.get('videoId')
052e1350 846 title = self._get_text(renderer, 'title')
847 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 848 duration = parse_duration(self._get_text(
849 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 850 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 851 view_count = str_to_int(self._search_regex(
852 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
853 'view count', default=None))
fe93e2c4 854
052e1350 855 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
fe93e2c4 856
30a074c2 857 return {
39ed931e 858 '_type': 'url',
30a074c2 859 'ie_key': YoutubeIE.ie_key(),
860 'id': video_id,
861 'url': video_id,
862 'title': title,
863 'description': description,
864 'duration': duration,
865 'view_count': view_count,
866 'uploader': uploader,
867 }
868
0c148415 869
360e1ca5 870class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 871 IE_DESC = 'YouTube.com'
bc2ca1bb 872 _INVIDIOUS_SITES = (
873 # invidious-redirect websites
874 r'(?:www\.)?redirect\.invidious\.io',
875 r'(?:(?:www|dev)\.)?invidio\.us',
876 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
877 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 878 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 879 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 880 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 881 # youtube-dl invidious instances list
882 r'(?:(?:www|no)\.)?invidiou\.sh',
883 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
884 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 885 r'(?:www\.)?invidious\.mastodon\.host',
886 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 887 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 888 r'(?:www\.)?invidious\.tinfoil-hat\.net',
889 r'(?:www\.)?invidious\.himiko\.cloud',
890 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 891 r'(?:www\.)?invidious\.tube',
892 r'(?:www\.)?invidiou\.site',
893 r'(?:www\.)?invidious\.site',
894 r'(?:www\.)?invidious\.xyz',
895 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 896 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 897 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 898 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 899 r'(?:www\.)?tube\.poal\.co',
900 r'(?:www\.)?tube\.connect\.cafe',
901 r'(?:www\.)?vid\.wxzm\.sx',
902 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 903 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 904 r'(?:www\.)?yewtu\.be',
905 r'(?:www\.)?yt\.elukerio\.org',
906 r'(?:www\.)?yt\.lelux\.fi',
907 r'(?:www\.)?invidious\.ggc-project\.de',
908 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 909 r'(?:www\.)?ytprivate\.com',
910 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 911 r'(?:www\.)?invidious\.toot\.koeln',
912 r'(?:www\.)?invidious\.fdn\.fr',
913 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 914 r'(?:www\.)?invidious\.namazso\.eu',
915 r'(?:www\.)?invidious\.silkky\.cloud',
916 r'(?:www\.)?invidious\.exonip\.de',
917 r'(?:www\.)?invidious\.riverside\.rocks',
918 r'(?:www\.)?invidious\.blamefran\.net',
919 r'(?:www\.)?invidious\.moomoo\.de',
920 r'(?:www\.)?ytb\.trom\.tf',
921 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 922 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
923 r'(?:www\.)?qklhadlycap4cnod\.onion',
924 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
925 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
926 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
927 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
928 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
929 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 930 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
931 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
932 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
933 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 934 )
cb7dfeea 935 _VALID_URL = r"""(?x)^
c5e8d7af 936 (
edb53e2d 937 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 938 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
939 (?:www\.)?deturl\.com/www\.youtube\.com|
940 (?:www\.)?pwnyoutube\.com|
941 (?:www\.)?hooktube\.com|
942 (?:www\.)?yourepeat\.com|
943 tube\.majestyc\.net|
944 %(invidious)s|
945 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
946 (?:.*?\#/)? # handle anchor (#/) redirect urls
947 (?: # the various things that can precede the ID:
ac7553d0 948 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 949 |(?: # or the v= param in all its forms
f7000f3a 950 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 951 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 952 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
953 v=
954 )
f4b05232 955 ))
cbaed4bb
S
956 |(?:
957 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
958 vid\.plus| # or vid.plus/xxxx
959 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 960 %(invidious)s
cbaed4bb 961 )/
edb53e2d 962 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 963 )
c5e8d7af 964 )? # all until now is optional -> you can pass the naked ID
201c1459 965 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 966 (?(1).+)? # if we found the ID, everything can follow
9297939e 967 (?:\#|$)""" % {
bc2ca1bb 968 'invidious': '|'.join(_INVIDIOUS_SITES),
969 }
e40c758c 970 _PLAYER_INFO_RE = (
cc2db878 971 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
972 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 973 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 974 )
2c62dc26 975 _formats = {
c2d3cb4c 976 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
977 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
978 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
979 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
980 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
981 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
982 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
983 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 984 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 985 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
986 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
987 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
988 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
989 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
990 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 991 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 992 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
993 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 994
995
996 # 3D videos
c2d3cb4c 997 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
998 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
999 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1000 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1001 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1002 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1003 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1004
96fb5605 1005 # Apple HTTP Live Streaming
11f12195 1006 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1007 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1008 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1009 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1010 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1011 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1012 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1013 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1014
1015 # DASH mp4 video
d23028a8
S
1016 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1017 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1018 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1019 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1020 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1021 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1022 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1023 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1024 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1025 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1026 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1027 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1028
f6f1fc92 1029 # Dash mp4 audio
d23028a8
S
1030 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1031 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1032 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1033 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1034 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1035 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1036 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1037
1038 # Dash webm
d23028a8
S
1039 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1040 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1041 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1042 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1043 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1044 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1045 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1046 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1047 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1048 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1049 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1050 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1051 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1052 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1053 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1054 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1055 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1056 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1057 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1058 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1059 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1060 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1061
1062 # Dash webm audio
d23028a8
S
1063 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1064 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1065
0857baad 1066 # Dash webm audio with opus inside
d23028a8
S
1067 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1068 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1069 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1070
ce6b9a2d
PH
1071 # RTMP (unnamed)
1072 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1073
1074 # av01 video only formats sometimes served with "unknown" codecs
1075 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1076 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1077 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1078 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 1079 }
29f7c58a 1080 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1081
fd5c4aab
S
1082 _GEO_BYPASS = False
1083
78caa52a 1084 IE_NAME = 'youtube'
2eb88d95
PH
1085 _TESTS = [
1086 {
2d3d2997 1087 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1088 'info_dict': {
1089 'id': 'BaW_jenozKc',
1090 'ext': 'mp4',
3867038a 1091 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1092 'uploader': 'Philipp Hagemeister',
1093 'uploader_id': 'phihag',
ec85ded8 1094 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
1095 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1096 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1097 'upload_date': '20121002',
3867038a 1098 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 1099 'categories': ['Science & Technology'],
3867038a 1100 'tags': ['youtube-dl'],
556dbe7f 1101 'duration': 10,
dbdaaa23 1102 'view_count': int,
3e7c1224
PH
1103 'like_count': int,
1104 'dislike_count': int,
7c80519c 1105 'start_time': 1,
297a564b 1106 'end_time': 9,
2eb88d95 1107 }
0e853ca4 1108 },
fccd3771 1109 {
4bc3a23e
PH
1110 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1111 'note': 'Embed-only video (#1746)',
1112 'info_dict': {
1113 'id': 'yZIXLfi8CZQ',
1114 'ext': 'mp4',
1115 'upload_date': '20120608',
1116 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1117 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1118 'uploader': 'SET India',
94bfcd23 1119 'uploader_id': 'setindia',
ec85ded8 1120 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1121 'age_limit': 18,
545cc85d 1122 },
1123 'skip': 'Private video',
fccd3771 1124 },
11b56058 1125 {
8bdd16b4 1126 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1127 'note': 'Use the first video ID in the URL',
1128 'info_dict': {
1129 'id': 'BaW_jenozKc',
1130 'ext': 'mp4',
3867038a 1131 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1132 'uploader': 'Philipp Hagemeister',
1133 'uploader_id': 'phihag',
ec85ded8 1134 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1135 'upload_date': '20121002',
3867038a 1136 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1137 'categories': ['Science & Technology'],
3867038a 1138 'tags': ['youtube-dl'],
556dbe7f 1139 'duration': 10,
dbdaaa23 1140 'view_count': int,
11b56058
PM
1141 'like_count': int,
1142 'dislike_count': int,
34a7de29
S
1143 },
1144 'params': {
1145 'skip_download': True,
1146 },
11b56058 1147 },
dd27fd17 1148 {
2d3d2997 1149 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1150 'note': '256k DASH audio (format 141) via DASH manifest',
1151 'info_dict': {
1152 'id': 'a9LDPn-MO4I',
1153 'ext': 'm4a',
1154 'upload_date': '20121002',
1155 'uploader_id': '8KVIDEO',
ec85ded8 1156 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1157 'description': '',
1158 'uploader': '8KVIDEO',
1159 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1160 },
4bc3a23e
PH
1161 'params': {
1162 'youtube_include_dash_manifest': True,
1163 'format': '141',
4919603f 1164 },
de3c7fe0 1165 'skip': 'format 141 not served anymore',
dd27fd17 1166 },
8bdd16b4 1167 # DASH manifest with encrypted signature
1168 {
1169 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1170 'info_dict': {
1171 'id': 'IB3lcPjvWLA',
1172 'ext': 'm4a',
1173 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1174 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1175 'duration': 244,
1176 'uploader': 'AfrojackVEVO',
1177 'uploader_id': 'AfrojackVEVO',
1178 'upload_date': '20131011',
cc2db878 1179 'abr': 129.495,
8bdd16b4 1180 },
1181 'params': {
1182 'youtube_include_dash_manifest': True,
1183 'format': '141/bestaudio[ext=m4a]',
1184 },
1185 },
65c2fde2 1186 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1187 {
65c2fde2 1188 'note': 'Embed allowed age-gate video',
2d3d2997 1189 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1190 'info_dict': {
1191 'id': 'HtVdAasjOgU',
1192 'ext': 'mp4',
1193 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1194 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1195 'duration': 142,
c522adb1
JMF
1196 'uploader': 'The Witcher',
1197 'uploader_id': 'WitcherGame',
ec85ded8 1198 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1199 'upload_date': '20140605',
34952f09 1200 'age_limit': 18,
c522adb1
JMF
1201 },
1202 },
65c2fde2 1203 {
1204 'note': 'Age-gate video with embed allowed in public site',
1205 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1206 'info_dict': {
1207 'id': 'HsUATh_Nc2U',
1208 'ext': 'mp4',
1209 'title': 'Godzilla 2 (Official Video)',
1210 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1211 'upload_date': '20200408',
1212 'uploader_id': 'FlyingKitty900',
1213 'uploader': 'FlyingKitty',
1214 'age_limit': 18,
1215 },
1216 },
1217 {
1218 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1219 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1220 'info_dict': {
1221 'id': 'Tq92D6wQ1mg',
1222 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1223 'ext': 'mp4',
1224 'upload_date': '20191227',
65c2fde2 1225 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1226 'uploader': 'Projekt Melody',
1227 'description': 'md5:17eccca93a786d51bc67646756894066',
1228 'age_limit': 18,
1229 },
1230 },
1231 {
1232 'note': 'Non-Agegated non-embeddable video',
1233 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1234 'info_dict': {
1235 'id': 'MeJVWBSsPAY',
1236 'ext': 'mp4',
1237 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1238 'uploader': 'Herr Lurik',
1239 'uploader_id': 'st3in234',
1240 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1241 'upload_date': '20130730',
1242 },
1243 },
1244 {
1245 'note': 'Non-bypassable age-gated video',
1246 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1247 'only_matching': True,
1248 },
8bdd16b4 1249 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1250 # YouTube Red ad is not captured for creator
1251 {
1252 'url': '__2ABJjxzNo',
1253 'info_dict': {
1254 'id': '__2ABJjxzNo',
1255 'ext': 'mp4',
1256 'duration': 266,
1257 'upload_date': '20100430',
1258 'uploader_id': 'deadmau5',
1259 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1260 'creator': 'deadmau5',
1261 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1262 'uploader': 'deadmau5',
1263 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1264 'alt_title': 'Some Chords',
8bdd16b4 1265 },
1266 'expected_warnings': [
1267 'DASH manifest missing',
1268 ]
1269 },
067aa17e 1270 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1271 {
1272 'url': 'lqQg6PlCWgI',
1273 'info_dict': {
1274 'id': 'lqQg6PlCWgI',
1275 'ext': 'mp4',
556dbe7f 1276 'duration': 6085,
90227264 1277 'upload_date': '20150827',
cbe2bd91 1278 'uploader_id': 'olympic',
ec85ded8 1279 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1280 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1281 'uploader': 'Olympics',
cbe2bd91
PH
1282 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1283 },
1284 'params': {
1285 'skip_download': 'requires avconv',
e52a40ab 1286 }
cbe2bd91 1287 },
6271f1ca
PH
1288 # Non-square pixels
1289 {
1290 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1291 'info_dict': {
1292 'id': '_b-2C3KPAM0',
1293 'ext': 'mp4',
1294 'stretched_ratio': 16 / 9.,
556dbe7f 1295 'duration': 85,
6271f1ca
PH
1296 'upload_date': '20110310',
1297 'uploader_id': 'AllenMeow',
ec85ded8 1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1299 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1300 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1301 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1302 },
06b491eb
S
1303 },
1304 # url_encoded_fmt_stream_map is empty string
1305 {
1306 'url': 'qEJwOuvDf7I',
1307 'info_dict': {
1308 'id': 'qEJwOuvDf7I',
f57b7835 1309 'ext': 'webm',
06b491eb
S
1310 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1311 'description': '',
1312 'upload_date': '20150404',
1313 'uploader_id': 'spbelect',
1314 'uploader': 'Наблюдатели Петербурга',
1315 },
1316 'params': {
1317 'skip_download': 'requires avconv',
e323cf3f
S
1318 },
1319 'skip': 'This live event has ended.',
06b491eb 1320 },
067aa17e 1321 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1322 {
1323 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1324 'info_dict': {
1325 'id': 'FIl7x6_3R5Y',
eb6793ba 1326 'ext': 'webm',
da77d856
S
1327 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1328 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1329 'duration': 220,
da77d856
S
1330 'upload_date': '20150625',
1331 'uploader_id': 'dorappi2000',
ec85ded8 1332 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1333 'uploader': 'dorappi2000',
eb6793ba 1334 'formats': 'mincount:31',
da77d856 1335 },
eb6793ba 1336 'skip': 'not actual anymore',
2ee8f5d8 1337 },
8a1a26ce
YCH
1338 # DASH manifest with segment_list
1339 {
1340 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1341 'md5': '8ce563a1d667b599d21064e982ab9e31',
1342 'info_dict': {
1343 'id': 'CsmdDsKjzN8',
1344 'ext': 'mp4',
17ee98e1 1345 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1346 'uploader': 'Airtek',
1347 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1348 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1349 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1350 },
1351 'params': {
1352 'youtube_include_dash_manifest': True,
1353 'format': '135', # bestvideo
be49068d
S
1354 },
1355 'skip': 'This live event has ended.',
2ee8f5d8 1356 },
cf7e015f
S
1357 {
1358 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1359 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1360 'info_dict': {
545cc85d 1361 'id': 'jvGDaLqkpTg',
1362 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1363 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1364 },
1365 'playlist': [{
1366 'info_dict': {
545cc85d 1367 'id': 'jvGDaLqkpTg',
cf7e015f 1368 'ext': 'mp4',
545cc85d 1369 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1370 'description': 'md5:e03b909557865076822aa169218d6a5d',
1371 'duration': 10643,
1372 'upload_date': '20161111',
1373 'uploader': 'Team PGP',
1374 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1375 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1376 },
1377 }, {
1378 'info_dict': {
545cc85d 1379 'id': '3AKt1R1aDnw',
cf7e015f 1380 'ext': 'mp4',
545cc85d 1381 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1382 'description': 'md5:e03b909557865076822aa169218d6a5d',
1383 'duration': 10991,
1384 'upload_date': '20161111',
1385 'uploader': 'Team PGP',
1386 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1387 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1388 },
1389 }, {
1390 'info_dict': {
545cc85d 1391 'id': 'RtAMM00gpVc',
cf7e015f 1392 'ext': 'mp4',
545cc85d 1393 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1394 'description': 'md5:e03b909557865076822aa169218d6a5d',
1395 'duration': 10995,
1396 'upload_date': '20161111',
1397 'uploader': 'Team PGP',
1398 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1399 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1400 },
1401 }, {
1402 'info_dict': {
545cc85d 1403 'id': '6N2fdlP3C5U',
cf7e015f 1404 'ext': 'mp4',
545cc85d 1405 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1406 'description': 'md5:e03b909557865076822aa169218d6a5d',
1407 'duration': 10990,
1408 'upload_date': '20161111',
1409 'uploader': 'Team PGP',
1410 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1411 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1412 },
1413 }],
1414 'params': {
1415 'skip_download': True,
1416 },
65c2fde2 1417 'skip': 'Not multifeed anymore',
cbaed4bb 1418 },
f9f49d87 1419 {
067aa17e 1420 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1421 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1422 'info_dict': {
1423 'id': 'gVfLd0zydlo',
1424 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1425 },
1426 'playlist_count': 2,
be49068d 1427 'skip': 'Not multifeed anymore',
f9f49d87 1428 },
cbaed4bb 1429 {
2d3d2997 1430 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1431 'only_matching': True,
0e49d9a6 1432 },
6d4fc66b 1433 {
2d3d2997 1434 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1435 'only_matching': True,
1436 },
0e49d9a6 1437 {
067aa17e 1438 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1439 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1440 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1441 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1442 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1443 'info_dict': {
1444 'id': 'lsguqyKfVQg',
1445 'ext': 'mp4',
1446 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1447 'alt_title': 'Dark Walk',
0e49d9a6 1448 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1449 'duration': 133,
0e49d9a6
LL
1450 'upload_date': '20151119',
1451 'uploader_id': 'IronSoulElf',
ec85ded8 1452 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1453 'uploader': 'IronSoulElf',
11f9be09 1454 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1455 'track': 'Dark Walk',
1456 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1457 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1458 },
1459 'params': {
1460 'skip_download': True,
1461 },
1462 },
61f92af1 1463 {
067aa17e 1464 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1465 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1466 'only_matching': True,
1467 },
313dfc45
LL
1468 {
1469 # Video with yt:stretch=17:0
1470 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1471 'info_dict': {
1472 'id': 'Q39EVAstoRM',
1473 'ext': 'mp4',
1474 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1475 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1476 'upload_date': '20151107',
1477 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1478 'uploader': 'CH GAMER DROID',
1479 },
1480 'params': {
1481 'skip_download': True,
1482 },
be49068d 1483 'skip': 'This video does not exist.',
313dfc45 1484 },
201c1459 1485 {
1486 # Video with incomplete 'yt:stretch=16:'
1487 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1488 'only_matching': True,
1489 },
7caf9830
S
1490 {
1491 # Video licensed under Creative Commons
1492 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1493 'info_dict': {
1494 'id': 'M4gD1WSo5mA',
1495 'ext': 'mp4',
1496 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1497 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1498 'duration': 721,
7caf9830
S
1499 'upload_date': '20150127',
1500 'uploader_id': 'BerkmanCenter',
ec85ded8 1501 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1502 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1503 'license': 'Creative Commons Attribution license (reuse allowed)',
1504 },
1505 'params': {
1506 'skip_download': True,
1507 },
1508 },
fd050249
S
1509 {
1510 # Channel-like uploader_url
1511 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1512 'info_dict': {
1513 'id': 'eQcmzGIKrzg',
1514 'ext': 'mp4',
1515 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1516 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1517 'duration': 4060,
fd050249 1518 'upload_date': '20151119',
eb6793ba 1519 'uploader': 'Bernie Sanders',
fd050249 1520 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1521 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1522 'license': 'Creative Commons Attribution license (reuse allowed)',
1523 },
1524 'params': {
1525 'skip_download': True,
1526 },
1527 },
040ac686
S
1528 {
1529 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1530 'only_matching': True,
7f29cf54
S
1531 },
1532 {
067aa17e 1533 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1534 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1535 'only_matching': True,
6496ccb4
S
1536 },
1537 {
1538 # Rental video preview
1539 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1540 'info_dict': {
1541 'id': 'uGpuVWrhIzE',
1542 'ext': 'mp4',
1543 'title': 'Piku - Trailer',
1544 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1545 'upload_date': '20150811',
1546 'uploader': 'FlixMatrix',
1547 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1548 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1549 'license': 'Standard YouTube License',
1550 },
1551 'params': {
1552 'skip_download': True,
1553 },
eb6793ba 1554 'skip': 'This video is not available.',
022a5d66 1555 },
12afdc2a
S
1556 {
1557 # YouTube Red video with episode data
1558 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1559 'info_dict': {
1560 'id': 'iqKdEhx-dD4',
1561 'ext': 'mp4',
1562 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1563 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1564 'duration': 2085,
12afdc2a
S
1565 'upload_date': '20170118',
1566 'uploader': 'Vsauce',
1567 'uploader_id': 'Vsauce',
1568 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1569 'series': 'Mind Field',
1570 'season_number': 1,
1571 'episode_number': 1,
1572 },
1573 'params': {
1574 'skip_download': True,
1575 },
1576 'expected_warnings': [
1577 'Skipping DASH manifest',
1578 ],
1579 },
c7121fa7
S
1580 {
1581 # The following content has been identified by the YouTube community
1582 # as inappropriate or offensive to some audiences.
1583 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1584 'info_dict': {
1585 'id': '6SJNVb0GnPI',
1586 'ext': 'mp4',
1587 'title': 'Race Differences in Intelligence',
1588 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1589 'duration': 965,
1590 'upload_date': '20140124',
1591 'uploader': 'New Century Foundation',
1592 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1593 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1594 },
1595 'params': {
1596 'skip_download': True,
1597 },
545cc85d 1598 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1599 },
022a5d66
S
1600 {
1601 # itag 212
1602 'url': '1t24XAntNCY',
1603 'only_matching': True,
fd5c4aab
S
1604 },
1605 {
1606 # geo restricted to JP
1607 'url': 'sJL6WA-aGkQ',
1608 'only_matching': True,
1609 },
cd5a74a2
S
1610 {
1611 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1612 'only_matching': True,
1613 },
bc2ca1bb 1614 {
1615 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1616 'only_matching': True,
1617 },
1618 {
1619 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1620 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1621 'only_matching': True,
1622 },
825cd268
RA
1623 {
1624 # DRM protected
1625 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1626 'only_matching': True,
4fe54c12
S
1627 },
1628 {
1629 # Video with unsupported adaptive stream type formats
1630 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1631 'info_dict': {
1632 'id': 'Z4Vy8R84T1U',
1633 'ext': 'mp4',
1634 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1635 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1636 'duration': 433,
1637 'upload_date': '20130923',
1638 'uploader': 'Amelia Putri Harwita',
1639 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1640 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1641 'formats': 'maxcount:10',
1642 },
1643 'params': {
1644 'skip_download': True,
1645 'youtube_include_dash_manifest': False,
1646 },
5429d6a9 1647 'skip': 'not actual anymore',
5caabd3c 1648 },
1649 {
822b9d9c 1650 # Youtube Music Auto-generated description
5caabd3c 1651 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1652 'info_dict': {
1653 'id': 'MgNrAu2pzNs',
1654 'ext': 'mp4',
1655 'title': 'Voyeur Girl',
1656 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1657 'upload_date': '20190312',
5429d6a9
S
1658 'uploader': 'Stephen - Topic',
1659 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1660 'artist': 'Stephen',
1661 'track': 'Voyeur Girl',
1662 'album': 'it\'s too much love to know my dear',
1663 'release_date': '20190313',
1664 'release_year': 2019,
1665 },
1666 'params': {
1667 'skip_download': True,
1668 },
1669 },
66b48727
RA
1670 {
1671 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1672 'only_matching': True,
1673 },
011e75e6
S
1674 {
1675 # invalid -> valid video id redirection
1676 'url': 'DJztXj2GPfl',
1677 'info_dict': {
1678 'id': 'DJztXj2GPfk',
1679 'ext': 'mp4',
1680 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1681 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1682 'upload_date': '20090125',
1683 'uploader': 'Prochorowka',
1684 'uploader_id': 'Prochorowka',
1685 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1686 'artist': 'Panjabi MC',
1687 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1688 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1689 },
1690 'params': {
1691 'skip_download': True,
1692 },
545cc85d 1693 'skip': 'Video unavailable',
ea74e00b
DP
1694 },
1695 {
1696 # empty description results in an empty string
1697 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1698 'info_dict': {
1699 'id': 'x41yOUIvK2k',
1700 'ext': 'mp4',
1701 'title': 'IMG 3456',
1702 'description': '',
1703 'upload_date': '20170613',
1704 'uploader_id': 'ElevageOrVert',
1705 'uploader': 'ElevageOrVert',
1706 },
1707 'params': {
1708 'skip_download': True,
1709 },
1710 },
a0566bbf 1711 {
29f7c58a 1712 # with '};' inside yt initial data (see [1])
1713 # see [2] for an example with '};' inside ytInitialPlayerResponse
1714 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1715 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1716 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1717 'info_dict': {
1718 'id': 'CHqg6qOn4no',
1719 'ext': 'mp4',
1720 'title': 'Part 77 Sort a list of simple types in c#',
1721 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1722 'upload_date': '20130831',
1723 'uploader_id': 'kudvenkat',
1724 'uploader': 'kudvenkat',
1725 },
1726 'params': {
1727 'skip_download': True,
1728 },
1729 },
29f7c58a 1730 {
1731 # another example of '};' in ytInitialData
1732 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1733 'only_matching': True,
1734 },
1735 {
1736 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1737 'only_matching': True,
1738 },
545cc85d 1739 {
cc2db878 1740 # https://github.com/ytdl-org/youtube-dl/pull/28094
1741 'url': 'OtqTfy26tG0',
1742 'info_dict': {
1743 'id': 'OtqTfy26tG0',
1744 'ext': 'mp4',
1745 'title': 'Burn Out',
1746 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1747 'upload_date': '20141120',
1748 'uploader': 'The Cinematic Orchestra - Topic',
1749 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1750 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1751 'artist': 'The Cinematic Orchestra',
1752 'track': 'Burn Out',
1753 'album': 'Every Day',
1754 'release_data': None,
1755 'release_year': None,
1756 },
1757 'params': {
1758 'skip_download': True,
1759 },
545cc85d 1760 },
bc2ca1bb 1761 {
1762 # controversial video, only works with bpctr when authenticated with cookies
1763 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1764 'only_matching': True,
1765 },
a1a7907b 1766 {
1767 # controversial video, requires bpctr/contentCheckOk
1768 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1769 'info_dict': {
1770 'id': 'SZJvDhaSDnc',
1771 'ext': 'mp4',
1772 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1773 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1774 'uploader': 'CBS This Morning',
11f9be09 1775 'uploader_id': 'CBSThisMorning',
a1a7907b 1776 'upload_date': '20140716',
1777 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1778 }
1779 },
f7ad7160 1780 {
1781 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1782 'url': 'cBvYw8_A0vQ',
1783 'info_dict': {
1784 'id': 'cBvYw8_A0vQ',
1785 'ext': 'mp4',
1786 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1787 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1788 'upload_date': '20201120',
1789 'uploader': 'Walk around Japan',
1790 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1791 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1792 },
1793 'params': {
1794 'skip_download': True,
1795 },
0fb983f6 1796 }, {
1797 # Has multiple audio streams
1798 'url': 'WaOKSUlf4TM',
1799 'only_matching': True
9297939e 1800 }, {
1801 # Requires Premium: has format 141 when requested using YTM url
1802 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1803 'only_matching': True
1804 }, {
120916da 1805 # multiple subtitles with same lang_code
1806 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1807 'only_matching': True,
109dd3b2 1808 }, {
1809 # Force use android client fallback
1810 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1811 'info_dict': {
1812 'id': 'YOelRv7fMxY',
11f9be09 1813 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1814 'ext': '3gp',
1815 'upload_date': '20210624',
1816 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1817 'uploader': 'colinfurze',
11f9be09 1818 'uploader_id': 'colinfurze',
109dd3b2 1819 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1820 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1821 },
1822 'params': {
1823 'format': '17', # 3gp format available on android
1824 'extractor_args': {'youtube': {'player_client': ['android']}},
1825 },
120916da 1826 },
109dd3b2 1827 {
1828 # Skip download of additional client configs (remix client config in this case)
1829 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1830 'only_matching': True,
1831 'params': {
1832 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1833 },
1834 }
2eb88d95
PH
1835 ]
1836
201c1459 1837 @classmethod
1838 def suitable(cls, url):
1bdae7d3 1839 # Hack for lazy extractors until more generic solution is implemented
1840 # (see #28780)
1841 from .youtube import parse_qs
201c1459 1842 qs = parse_qs(url)
1843 if qs.get('list', [None])[0]:
1844 return False
1845 return super(YoutubeIE, cls).suitable(url)
1846
e0df6211
PH
1847 def __init__(self, *args, **kwargs):
1848 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1849 self._code_cache = {}
83799698 1850 self._player_cache = {}
e0df6211 1851
109dd3b2 1852 def _extract_player_url(self, ytcfg=None, webpage=None):
1853 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
11f9be09 1854 if not player_url and webpage:
109dd3b2 1855 player_url = self._search_regex(
1856 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1857 webpage, 'player URL', fatal=False)
11f9be09 1858 if not player_url:
1859 return None
109dd3b2 1860 if player_url.startswith('//'):
1861 player_url = 'https:' + player_url
1862 elif not re.match(r'https?://', player_url):
1863 player_url = compat_urlparse.urljoin(
1864 'https://www.youtube.com', player_url)
1865 return player_url
1866
60064c53
PH
1867 def _signature_cache_id(self, example_sig):
1868 """ Return a string representation of a signature """
78caa52a 1869 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1870
e40c758c
S
1871 @classmethod
1872 def _extract_player_info(cls, player_url):
1873 for player_re in cls._PLAYER_INFO_RE:
1874 id_m = re.search(player_re, player_url)
1875 if id_m:
1876 break
1877 else:
c081b35c 1878 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1879 return id_m.group('id')
e40c758c 1880
109dd3b2 1881 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1882 player_id = self._extract_player_info(player_url)
1883 if player_id not in self._code_cache:
1884 self._code_cache[player_id] = self._download_webpage(
1885 player_url, video_id, fatal=fatal,
1886 note='Downloading player ' + player_id,
1887 errnote='Download of %s failed' % player_url)
1888 return player_id in self._code_cache
1889
e40c758c 1890 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1891 player_id = self._extract_player_info(player_url)
e0df6211 1892
c4417ddb 1893 # Read from filesystem cache
545cc85d 1894 func_id = 'js_%s_%s' % (
1895 player_id, self._signature_cache_id(example_sig))
c4417ddb 1896 assert os.path.basename(func_id) == func_id
a0e07d31 1897
69ea8ca4 1898 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1899 if cache_spec is not None:
78caa52a 1900 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1901
109dd3b2 1902 if self._load_player(video_id, player_url):
1903 code = self._code_cache[player_id]
1904 res = self._parse_sig_js(code)
e0df6211 1905
109dd3b2 1906 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1907 cache_res = res(test_string)
1908 cache_spec = [ord(c) for c in cache_res]
83799698 1909
109dd3b2 1910 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1911 return res
83799698 1912
60064c53 1913 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1914 def gen_sig_code(idxs):
1915 def _genslice(start, end, step):
78caa52a 1916 starts = '' if start == 0 else str(start)
8bcc8756 1917 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1918 steps = '' if step == 1 else (':%d' % step)
78caa52a 1919 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1920
1921 step = None
7af808a5
PH
1922 # Quelch pyflakes warnings - start will be set when step is set
1923 start = '(Never used)'
edf3e38e
PH
1924 for i, prev in zip(idxs[1:], idxs[:-1]):
1925 if step is not None:
1926 if i - prev == step:
1927 continue
1928 yield _genslice(start, prev, step)
1929 step = None
1930 continue
1931 if i - prev in [-1, 1]:
1932 step = i - prev
1933 start = prev
1934 continue
1935 else:
78caa52a 1936 yield 's[%d]' % prev
edf3e38e 1937 if step is None:
78caa52a 1938 yield 's[%d]' % i
edf3e38e
PH
1939 else:
1940 yield _genslice(start, i, step)
1941
78caa52a 1942 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1943 cache_res = func(test_string)
edf3e38e 1944 cache_spec = [ord(c) for c in cache_res]
78caa52a 1945 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1946 signature_id_tuple = '(%s)' % (
1947 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1948 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1949 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1950 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1951
e0df6211
PH
1952 def _parse_sig_js(self, jscode):
1953 funcname = self._search_regex(
abefc03f
S
1954 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1955 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
1956 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1957 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1958 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1959 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1960 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1961 # Obsolete patterns
1962 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1963 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1964 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1965 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1966 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1967 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1968 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1969 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1970 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1971
1972 jsi = JSInterpreter(jscode)
1973 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1974 return lambda s: initial_function([s])
1975
545cc85d 1976 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1977 """Turn the encrypted s field into a working signature"""
6b37f0be 1978
c8bf86d5 1979 if player_url is None:
69ea8ca4 1980 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1981
c8bf86d5 1982 try:
62af3a0e 1983 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1984 if player_id not in self._player_cache:
1985 func = self._extract_signature_function(
60064c53 1986 video_id, player_url, s
c8bf86d5
PH
1987 )
1988 self._player_cache[player_id] = func
1989 func = self._player_cache[player_id]
a06916d9 1990 if self.get_param('youtube_print_sig_code'):
60064c53 1991 self._print_sig_code(func, s)
c8bf86d5
PH
1992 return func(s)
1993 except Exception as e:
1994 tb = traceback.format_exc()
1995 raise ExtractorError(
78caa52a 1996 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1997
109dd3b2 1998 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1999 """
2000 Extract signatureTimestamp (sts)
2001 Required to tell API what sig/player version is in use.
2002 """
2003 sts = None
2004 if isinstance(ytcfg, dict):
2005 sts = int_or_none(ytcfg.get('STS'))
2006
2007 if not sts:
2008 # Attempt to extract from player
2009 if player_url is None:
2010 error_msg = 'Cannot extract signature timestamp without player_url.'
2011 if fatal:
2012 raise ExtractorError(error_msg)
2013 self.report_warning(error_msg)
2014 return
2015 if self._load_player(video_id, player_url, fatal=fatal):
2016 player_id = self._extract_player_info(player_url)
2017 code = self._code_cache[player_id]
2018 sts = int_or_none(self._search_regex(
2019 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2020 'JS player signature timestamp', group='sts', fatal=fatal))
2021 return sts
2022
11f9be09 2023 def _mark_watched(self, video_id, player_responses):
352d63fd 2024 playback_url = traverse_obj(
2025 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2026 expected_type=url_or_none, get_all=False)
d77ab8e2 2027 if not playback_url:
352d63fd 2028 self.report_warning('Unable to mark watched')
d77ab8e2
S
2029 return
2030 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2031 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2032
2033 # cpn generation algorithm is reverse engineered from base.js.
2034 # In fact it works even with dummy cpn.
2035 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2036 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2037
2038 qs.update({
2039 'ver': ['2'],
2040 'cpn': [cpn],
2041 })
2042 playback_url = compat_urlparse.urlunparse(
15707c7e 2043 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
2044
2045 self._download_webpage(
2046 playback_url, video_id, 'Marking watched',
2047 'Unable to mark watched', fatal=False)
2048
66c9fa36
S
2049 @staticmethod
2050 def _extract_urls(webpage):
2051 # Embedded YouTube player
2052 entries = [
2053 unescapeHTML(mobj.group('url'))
2054 for mobj in re.finditer(r'''(?x)
2055 (?:
2056 <iframe[^>]+?src=|
2057 data-video-url=|
2058 <embed[^>]+?src=|
2059 embedSWF\(?:\s*|
2060 <object[^>]+data=|
2061 new\s+SWFObject\(
2062 )
2063 (["\'])
2064 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2065 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2066 \1''', webpage)]
2067
2068 # lazyYT YouTube embed
2069 entries.extend(list(map(
2070 unescapeHTML,
2071 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2072
2073 # Wordpress "YouTube Video Importer" plugin
2074 matches = re.findall(r'''(?x)<div[^>]+
2075 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2076 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2077 entries.extend(m[-1] for m in matches)
2078
2079 return entries
2080
2081 @staticmethod
2082 def _extract_url(webpage):
2083 urls = YoutubeIE._extract_urls(webpage)
2084 return urls[0] if urls else None
2085
97665381
PH
2086 @classmethod
2087 def extract_id(cls, url):
2088 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2089 if mobj is None:
69ea8ca4 2090 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 2091 return mobj.group('id')
c5e8d7af 2092
7c365c21 2093 def _extract_chapters_from_json(self, data, duration):
2094 chapter_list = traverse_obj(
2095 data, (
2096 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2097 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2098 ), expected_type=list)
2099
2100 return self._extract_chapters(
2101 chapter_list,
2102 chapter_time=lambda chapter: float_or_none(
2103 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2104 chapter_title=lambda chapter: traverse_obj(
2105 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2106 duration=duration)
2107
2108 def _extract_chapters_from_engagement_panel(self, data, duration):
2109 content_list = traverse_obj(
8bdd16b4 2110 data,
7c365c21 2111 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2112 expected_type=list, default=[])
052e1350 2113 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2114 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2115
2116 return next((
2117 filter(None, (
2118 self._extract_chapters(
2119 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2120 chapter_time, chapter_title, duration)
2121 for contents in content_list
2122 ))), [])
2123
2124 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2125 chapters = []
7c365c21 2126 last_chapter = {'start_time': 0}
2127 for idx, chapter in enumerate(chapter_list or []):
2128 title = chapter_title(chapter)
84213ea8
S
2129 start_time = chapter_time(chapter)
2130 if start_time is None:
2131 continue
7c365c21 2132 last_chapter['end_time'] = start_time
2133 if start_time < last_chapter['start_time']:
2134 if idx == 1:
2135 chapters.pop()
2136 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2137 else:
2138 self.report_warning(f'Invalid start time for chapter "{title}"')
2139 continue
2140 last_chapter = {'start_time': start_time, 'title': title}
2141 chapters.append(last_chapter)
2142 last_chapter['end_time'] = duration
84213ea8
S
2143 return chapters
2144
545cc85d 2145 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2146 return self._parse_json(self._search_regex(
2147 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2148 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2149
d92f5d5a 2150 @staticmethod
2151 def parse_time_text(time_text):
2152 """
2153 Parse the comment time text
2154 time_text is in the format 'X units ago (edited)'
2155 """
2156 time_text_split = time_text.split(' ')
2157 if len(time_text_split) >= 3:
da503b7a 2158 try:
2159 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2160 except ValueError:
2161 return None
d92f5d5a 2162
a1c5d2ca
M
2163 def _extract_comment(self, comment_renderer, parent=None):
2164 comment_id = comment_renderer.get('commentId')
2165 if not comment_id:
2166 return
fe93e2c4 2167
052e1350 2168 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2169
49bd8c66 2170 # note: timestamp is an estimate calculated from the current time and time_text
052e1350 2171 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
fe93e2c4 2172 time_text_dt = self.parse_time_text(time_text)
2173 if isinstance(time_text_dt, datetime.datetime):
2174 timestamp = calendar.timegm(time_text_dt.timetuple())
052e1350 2175 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2176 author_id = try_get(comment_renderer,
2177 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2178
49bd8c66 2179 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2180 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2181 author_thumbnail = try_get(comment_renderer,
2182 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2183
2184 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2185 is_favorited = 'creatorHeart' in (try_get(
2186 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2187 return {
2188 'id': comment_id,
2189 'text': text,
d92f5d5a 2190 'timestamp': timestamp,
a1c5d2ca
M
2191 'time_text': time_text,
2192 'like_count': votes,
97524332 2193 'is_favorited': is_favorited,
a1c5d2ca
M
2194 'author': author,
2195 'author_id': author_id,
2196 'author_thumbnail': author_thumbnail,
2197 'author_is_uploader': author_is_uploader,
2198 'parent': parent or 'root'
2199 }
2200
2201 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2202 ytcfg, video_id, parent=None, comment_counts=None):
2203
2204 def extract_header(contents):
2205 _total_comments = 0
2206 _continuation = None
2207 for content in contents:
2208 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2209 expected_comment_count = parse_count(self._get_text(
052e1350 2210 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2211
2d6659b9 2212 if expected_comment_count:
fe93e2c4 2213 comment_counts[1] = expected_comment_count
2214 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2215 _total_comments = comment_counts[1]
2216 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2217 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2218
2219 sort_menu_item = try_get(
2220 comments_header_renderer,
2221 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2222 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2223
2224 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2225 if not _continuation:
2226 continue
2227
2228 sort_text = sort_menu_item.get('title')
2229 if isinstance(sort_text, compat_str):
2230 sort_text = sort_text.lower()
2231 else:
2232 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2233 self.to_screen('Sorting comments by %s' % sort_text)
2234 break
2235 return _total_comments, _continuation
a1c5d2ca 2236
2d6659b9 2237 def extract_thread(contents):
a1c5d2ca
M
2238 if not parent:
2239 comment_counts[2] = 0
2240 for content in contents:
2241 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2242 comment_renderer = try_get(
2243 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2244 content, (lambda x: x['commentRenderer'], dict))
2245
2246 if not comment_renderer:
2247 continue
2248 comment = self._extract_comment(comment_renderer, parent)
2249 if not comment:
2250 continue
2251 comment_counts[0] += 1
2252 yield comment
2253 # Attempt to get the replies
2254 comment_replies_renderer = try_get(
2255 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2256
2257 if comment_replies_renderer:
2258 comment_counts[2] += 1
2259 comment_entries_iter = self._comment_entries(
f4f751af 2260 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2261 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2262
2263 for reply_comment in comment_entries_iter:
2264 yield reply_comment
2265
2d6659b9 2266 # YouTube comments have a max depth of 2
2267 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2268 if max_depth == 1 and parent:
2269 return
a1c5d2ca
M
2270 if not comment_counts:
2271 # comment so far, est. total comments, current comment thread #
2272 comment_counts = [0, 0, 0]
a1c5d2ca 2273
2d6659b9 2274 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2275 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2276 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2277 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2278 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2279
2280 visitor_data = None
2281 is_first_continuation = parent is None
a1c5d2ca
M
2282
2283 for page_num in itertools.count(0):
2284 if not continuation:
2285 break
11f9be09 2286 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2287 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2288 if page_num == 0:
2289 if is_first_continuation:
2290 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2291 else:
2d6659b9 2292 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2293 comment_counts[2], comment_prog_str)
2294 else:
2295 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2296 ' ' if parent else '', ' replies' if parent else '',
2297 page_num, comment_prog_str)
2298
2299 response = self._extract_response(
fe93e2c4 2300 item_id=None, query=continuation,
2d6659b9 2301 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2302 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2303 if not response:
2304 break
f4f751af 2305 visitor_data = try_get(
2306 response,
2307 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2308 compat_str) or visitor_data
a1c5d2ca 2309
2d6659b9 2310 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2311
2d6659b9 2312 continuation = None
2313 if isinstance(continuation_contents, list):
2314 for continuation_section in continuation_contents:
2315 if not isinstance(continuation_section, dict):
2316 continue
2317 continuation_items = try_get(
2318 continuation_section,
2319 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2320 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2321 list) or []
2322 if is_first_continuation:
2323 total_comments, continuation = extract_header(continuation_items)
2324 if total_comments:
2325 yield total_comments
2326 is_first_continuation = False
2327 if continuation:
2328 break
2329 continue
2330 count = 0
2331 for count, entry in enumerate(extract_thread(continuation_items)):
2332 yield entry
2333 continuation = self._extract_continuation({'contents': continuation_items})
2334 if continuation:
2335 # Sometimes YouTube provides a continuation without any comments
2336 # In most cases we end up just downloading these with very little comments to come.
2337 if count == 0:
2338 if not parent:
2339 self.report_warning('No comments received - assuming end of comments')
2340 continuation = None
a1c5d2ca
M
2341 break
2342
2d6659b9 2343 # Deprecated response structure
2344 elif isinstance(continuation_contents, dict):
2345 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2346 for key, continuation_renderer in continuation_contents.items():
2347 if key not in known_continuation_renderers:
2348 continue
2349 if not isinstance(continuation_renderer, dict):
2350 continue
2351 if is_first_continuation:
2352 header_continuation_items = [continuation_renderer.get('header') or {}]
2353 total_comments, continuation = extract_header(header_continuation_items)
2354 if total_comments:
2355 yield total_comments
2356 is_first_continuation = False
2357 if continuation:
2358 break
a1c5d2ca 2359
2d6659b9 2360 # Sometimes YouTube provides a continuation without any comments
2361 # In most cases we end up just downloading these with very little comments to come.
2362 count = 0
2363 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2364 yield entry
2365 continuation = self._extract_continuation(continuation_renderer)
2366 if count == 0:
2367 if not parent:
2368 self.report_warning('No comments received - assuming end of comments')
2369 continuation = None
2370 break
a1c5d2ca 2371
2d6659b9 2372 @staticmethod
2373 def _generate_comment_continuation(video_id):
2374 """
2375 Generates initial comment section continuation token from given video id
2376 """
2377 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2378 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2379 new_continuation_intlist = list(itertools.chain.from_iterable(
2380 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2381 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2382
2383 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2384 """Entry for comment extraction"""
2d6659b9 2385 def _real_comment_extract(contents):
2386 if isinstance(contents, list):
2387 for entry in contents:
2388 for key, renderer in entry.items():
2389 if key not in known_entry_comment_renderers:
2390 continue
2391 yield from self._comment_entries(
2392 renderer, video_id=video_id, ytcfg=ytcfg,
2393 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2394 account_syncid=self._extract_account_syncid(ytcfg))
2395 break
a1c5d2ca 2396 comments = []
2d6659b9 2397 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2398 estimated_total = 0
2d6659b9 2399 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
65524694 2400 # Force English regardless of account setting to prevent parsing issues
2401 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2402 ytcfg = copy.deepcopy(ytcfg)
2403 traverse_obj(
2404 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2d6659b9 2405 try:
2406 for comment in _real_comment_extract(contents):
2407 if len(comments) >= max_comments:
2408 break
2409 if isinstance(comment, int):
2410 estimated_total = comment
2411 continue
2412 comments.append(comment)
2413 except KeyboardInterrupt:
2414 self.to_screen('Interrupted by user')
d92f5d5a 2415 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2416 return {
2417 'comments': comments,
2418 'comment_count': len(comments),
2419 }
2420
109dd3b2 2421 @staticmethod
2422 def _generate_player_context(sts=None):
2423 context = {
2424 'html5Preference': 'HTML5_PREF_WANTS',
2425 }
2426 if sts is not None:
2427 context['signatureTimestamp'] = sts
2428 return {
2429 'playbackContext': {
2430 'contentPlaybackContext': context
a1a7907b 2431 },
2fd226f6 2432 'contentCheckOk': True,
2433 'racyCheckOk': True
109dd3b2 2434 }
2435
e7e94f2a
D
2436 @staticmethod
2437 def _is_agegated(player_response):
2438 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2439 return True
e7e94f2a
D
2440
2441 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2442 AGE_GATE_REASONS = (
2443 'confirm your age', 'age-restricted', 'inappropriate', # reason
2444 'age_verification_required', 'age_check_required', # status
2445 )
2446 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2447
2448 @staticmethod
2449 def _is_unplayable(player_response):
2450 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2451
11f9be09 2452 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
109dd3b2 2453
11f9be09 2454 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2455 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2456 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2457 headers = self.generate_api_headers(
2458 player_ytcfg, identity_token, syncid,
000c15a4 2459 default_client=client, session_index=session_index)
9297939e 2460
11f9be09 2461 yt_query = {'videoId': video_id}
2462 yt_query.update(self._generate_player_context(sts))
2463 return self._extract_response(
2464 item_id=video_id, ep='player', query=yt_query,
379e44ed 2465 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 2466 default_client=client,
11f9be09 2467 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2468 ) or None
2469
11f9be09 2470 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2471 requested_clients = []
000c15a4 2472 allowed_clients = sorted(
2473 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2474 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2475 for client in self._configuration_arg('player_client'):
2476 if client in allowed_clients:
2477 requested_clients.append(client)
2478 elif client == 'all':
2479 requested_clients.extend(allowed_clients)
2480 else:
2481 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2482 if not requested_clients:
2483 requested_clients = ['android', 'web']
cf7e015f 2484
11f9be09 2485 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2486 requested_clients.extend(
e7e94f2a 2487 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 2488
11f9be09 2489 return orderedSet(requested_clients)
cf7e015f 2490
c0bc527b
M
2491 def _extract_player_ytcfg(self, client, video_id):
2492 url = {
2493 'web_music': 'https://music.youtube.com',
2494 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2495 }.get(client)
2496 if not url:
2497 return {}
2498 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2499 return self.extract_ytcfg(video_id, webpage) or {}
2500
11f9be09 2501 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2502 initial_pr = None
2503 if webpage:
2504 initial_pr = self._extract_yt_initial_variable(
2505 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2506 video_id, 'initial player response')
6b09401b 2507
c0bc527b
M
2508 original_clients = clients
2509 clients = clients[::-1]
e7e94f2a
D
2510
2511 def append_client(client_name):
2512 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2513 clients.append(client_name)
2514
379e44ed 2515 # Android player_response does not have microFormats which are needed for
2516 # extraction of some data. So we return the initial_pr with formats
2517 # stripped out even if not requested by the user
2518 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2519 yielded_pr = False
2520 if initial_pr:
2521 pr = dict(initial_pr)
2522 pr['streamingData'] = None
2523 yielded_pr = True
2524 yield pr
2525
2526 last_error = None
c0bc527b
M
2527 while clients:
2528 client = clients.pop()
11f9be09 2529 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2530 if 'configs' not in self._configuration_arg('player_skip'):
2531 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b 2532
379e44ed 2533 try:
2534 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2535 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2536 except ExtractorError as e:
2537 if last_error:
2538 self.report_warning(last_error)
2539 last_error = e
2540 continue
2541
11f9be09 2542 if pr:
379e44ed 2543 yielded_pr = True
11f9be09 2544 yield pr
c0bc527b 2545
e7e94f2a
D
2546 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2547 if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header():
2548 append_client(client.replace('_agegate', '_creator'))
2549 elif self._is_agegated(pr):
2550 append_client(f'{client}_agegate')
c0bc527b 2551
379e44ed 2552 if last_error:
2553 if not yielded_pr:
2554 raise last_error
2555 self.report_warning(last_error)
11f9be09 2556
2557 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2558 itags, stream_ids = [], []
2a9c6dcd 2559 itag_qualities, res_qualities = {}, {}
d3fc8074 2560 q = qualities([
2a9c6dcd 2561 # Normally tiny is the smallest video-only formats. But
2562 # audio-only formats with unknown quality may get tagged as tiny
2563 'tiny',
2564 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2565 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2566 ])
11f9be09 2567 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2568
545cc85d 2569 for fmt in streaming_formats:
2570 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2571 continue
321bf820 2572
cc2db878 2573 itag = str_or_none(fmt.get('itag'))
9297939e 2574 audio_track = fmt.get('audioTrack') or {}
2575 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2576 if stream_id in stream_ids:
2577 continue
2578
cc2db878 2579 quality = fmt.get('quality')
2a9c6dcd 2580 height = int_or_none(fmt.get('height'))
d3fc8074 2581 if quality == 'tiny' or not quality:
2582 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2583 # The 3gp format (17) in android client has a quality of "small",
2584 # but is actually worse than other formats
2585 if itag == '17':
2586 quality = 'tiny'
2587 if quality:
2588 if itag:
2589 itag_qualities[itag] = quality
2590 if height:
2591 res_qualities[height] = quality
cc2db878 2592 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2593 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2594 # number of fragment that would subsequently requested with (`&sq=N`)
2595 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2596 continue
2597
545cc85d 2598 fmt_url = fmt.get('url')
2599 if not fmt_url:
2600 sc = compat_parse_qs(fmt.get('signatureCipher'))
2601 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2602 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2603 if not (sc and fmt_url and encrypted_sig):
2604 continue
545cc85d 2605 if not player_url:
201e9eaa 2606 continue
545cc85d 2607 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2608 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2609 fmt_url += '&' + sp + '=' + signature
2610
545cc85d 2611 if itag:
2612 itags.append(itag)
9297939e 2613 stream_ids.append(stream_id)
2614
cc2db878 2615 tbr = float_or_none(
2616 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2617 dct = {
2618 'asr': int_or_none(fmt.get('audioSampleRate')),
2619 'filesize': int_or_none(fmt.get('contentLength')),
2620 'format_id': itag,
11f9be09 2621 'format_note': ', '.join(filter(None, (
2a9c6dcd 2622 audio_track.get('displayName'),
2623 fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
545cc85d 2624 'fps': int_or_none(fmt.get('fps')),
2a9c6dcd 2625 'height': height,
dca3ff4a 2626 'quality': q(quality),
cc2db878 2627 'tbr': tbr,
545cc85d 2628 'url': fmt_url,
2a9c6dcd 2629 'width': int_or_none(fmt.get('width')),
0fb983f6 2630 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2631 }
60bdb7bd 2632 mime_mobj = re.match(
2633 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2634 if mime_mobj:
2635 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2636 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2637 no_audio = dct.get('acodec') == 'none'
2638 no_video = dct.get('vcodec') == 'none'
2639 if no_audio:
2640 dct['vbr'] = tbr
2641 if no_video:
2642 dct['abr'] = tbr
2643 if no_audio or no_video:
545cc85d 2644 dct['downloader_options'] = {
2645 # Youtube throttles chunks >~10M
2646 'http_chunk_size': 10485760,
bf1317d2 2647 }
7c60c33e 2648 if dct.get('ext'):
2649 dct['container'] = dct['ext'] + '_dash'
11f9be09 2650 yield dct
545cc85d 2651
4bb6b02f 2652 skip_manifests = self._configuration_arg('skip')
57015a4a 2653 get_dash = (
2654 (not is_live or self._configuration_arg('include_live_dash'))
2655 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
5d3a0e79 2656 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2657
2a9c6dcd 2658 def guess_quality(f):
2659 for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2660 if val in qdict:
2661 return q(qdict[val])
2662 return -1
2663
11f9be09 2664 for sd in streaming_data:
5d3a0e79 2665 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2666 if hls_manifest_url:
2a9c6dcd 2667 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
9297939e 2668 itag = self._search_regex(
2669 r'/itag/(\d+)', f['url'], 'itag', default=None)
11f9be09 2670 if itag in itags:
2671 continue
9297939e 2672 if itag:
2673 f['format_id'] = itag
11f9be09 2674 itags.append(itag)
2a9c6dcd 2675 f['quality'] = guess_quality(f)
11f9be09 2676 yield f
545cc85d 2677
5d3a0e79 2678 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2679 if dash_manifest_url:
2a9c6dcd 2680 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
5d3a0e79 2681 itag = f['format_id']
2682 if itag in itags:
2683 continue
11f9be09 2684 if itag:
2685 itags.append(itag)
2a9c6dcd 2686 f['quality'] = guess_quality(f)
5d3a0e79 2687 filesize = int_or_none(self._search_regex(
2688 r'/clen/(\d+)', f.get('fragment_base_url')
2689 or f['url'], 'file size', default=None))
2690 if filesize:
2691 f['filesize'] = filesize
11f9be09 2692 yield f
2693
2694 def _real_extract(self, url):
2695 url, smuggled_data = unsmuggle_url(url, {})
2696 video_id = self._match_id(url)
2697
2698 base_url = self.http_scheme() + '//www.youtube.com/'
2699 webpage_url = base_url + 'watch?v=' + video_id
2700 webpage = self._download_webpage(
2701 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2702
2703 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2704 player_url = self._extract_player_url(master_ytcfg, webpage)
2705 identity_token = self._extract_identity_token(webpage, video_id)
2706
2707 player_responses = list(self._extract_player_responses(
2708 self._get_requested_clients(url, smuggled_data),
2709 video_id, webpage, master_ytcfg, player_url, identity_token))
2710
352d63fd 2711 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2712
2713 playability_statuses = traverse_obj(
2714 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2715
2716 trailer_video_id = get_first(
2717 playability_statuses,
2718 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2719 expected_type=str)
2720 if trailer_video_id:
2721 return self.url_result(
2722 trailer_video_id, self.ie_key(), trailer_video_id)
2723
2724 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2725 if webpage else (lambda x: None))
2726
2727 video_details = traverse_obj(
2728 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2729 microformats = traverse_obj(
2730 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2731 expected_type=dict, default=[])
2732 video_title = (
2733 get_first(video_details, 'title')
2734 or self._get_text(microformats, (..., 'title'))
2735 or search_meta(['og:title', 'twitter:title', 'title']))
2736 video_description = get_first(video_details, 'shortDescription')
2737
2738 if not smuggled_data.get('force_singlefeed', False):
2739 if not self.get_param('noplaylist'):
2740 multifeed_metadata_list = get_first(
2741 player_responses,
2742 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2743 expected_type=str)
2744 if multifeed_metadata_list:
2745 entries = []
2746 feed_ids = []
2747 for feed in multifeed_metadata_list.split(','):
2748 # Unquote should take place before split on comma (,) since textual
2749 # fields may contain comma as well (see
2750 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2751 feed_data = compat_parse_qs(
2752 compat_urllib_parse_unquote_plus(feed))
2753
2754 def feed_entry(name):
2755 return try_get(
2756 feed_data, lambda x: x[name][0], compat_str)
2757
2758 feed_id = feed_entry('id')
2759 if not feed_id:
2760 continue
2761 feed_title = feed_entry('title')
2762 title = video_title
2763 if feed_title:
2764 title += ' (%s)' % feed_title
2765 entries.append({
2766 '_type': 'url_transparent',
2767 'ie_key': 'Youtube',
2768 'url': smuggle_url(
2769 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2770 {'force_singlefeed': True}),
2771 'title': title,
2772 })
2773 feed_ids.append(feed_id)
2774 self.to_screen(
2775 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2776 % (', '.join(feed_ids), video_id))
2777 return self.playlist_result(
2778 entries, video_id, video_title, video_description)
2779 else:
2780 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2781
7ea65411 2782 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2783 is_live = get_first(video_details, 'isLive')
7ea65411 2784 if is_live is None:
2785 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2786
2787 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2788 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2789
545cc85d 2790 if not formats:
11f9be09 2791 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
b7da73eb 2792 self.raise_no_formats(
545cc85d 2793 'This video is DRM protected.', expected=True)
11f9be09 2794 pemr = get_first(
2795 playability_statuses,
2796 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2797 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2798 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2799 if subreason:
545cc85d 2800 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2801 countries = get_first(microformats, 'availableCountries')
545cc85d 2802 if not countries:
2803 regions_allowed = search_meta('regionsAllowed')
2804 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2805 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2806 reason += f'. {subreason}'
545cc85d 2807 if reason:
b7da73eb 2808 self.raise_no_formats(reason, expected=True)
bf1317d2 2809
11f9be09 2810 for f in formats:
2a9c6dcd 2811 if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
11f9be09 2812 f['source_preference'] = -10
3619f78d 2813 # TODO: this method is not reliable
2814 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
11f9be09 2815
2a9c6dcd 2816 # Source is given priority since formats that throttle are given lower source_preference
2817 # When throttling issue is fully fixed, remove this
2818 self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
bf1317d2 2819
11f9be09 2820 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2821 if not keywords and webpage:
2822 keywords = [
2823 unescapeHTML(m.group('content'))
2824 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2825 for keyword in keywords:
2826 if keyword.startswith('yt:stretch='):
201c1459 2827 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2828 if mobj:
2829 # NB: float is intentional for forcing float division
2830 w, h = (float(v) for v in mobj.groups())
2831 if w > 0 and h > 0:
2832 ratio = w / h
2833 for f in formats:
2834 if f.get('vcodec') != 'none':
2835 f['stretched_ratio'] = ratio
2836 break
6449cd80 2837
545cc85d 2838 thumbnails = []
11f9be09 2839 thumbnail_dicts = traverse_obj(
2840 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2841 expected_type=dict, default=[])
2842 for thumbnail in thumbnail_dicts:
2843 thumbnail_url = thumbnail.get('url')
2844 if not thumbnail_url:
2845 continue
2846 # Sometimes youtube gives a wrong thumbnail URL. See:
2847 # https://github.com/yt-dlp/yt-dlp/issues/233
2848 # https://github.com/ytdl-org/youtube-dl/issues/28023
2849 if 'maxresdefault' in thumbnail_url:
2850 thumbnail_url = thumbnail_url.split('?')[0]
2851 thumbnails.append({
2852 'url': thumbnail_url,
2853 'height': int_or_none(thumbnail.get('height')),
2854 'width': int_or_none(thumbnail.get('width')),
2855 })
ff2751ac 2856 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2857 if thumbnail_url:
2858 thumbnails.append({
2859 'url': thumbnail_url,
ff2751ac 2860 })
0ba692ac 2861 # The best resolution thumbnails sometimes does not appear in the webpage
2862 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2863 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2864 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
245524e6 2865 # TODO: Test them also? - For some videos, even these don't exist
cca80fe6 2866 guaranteed_thumbnail_names = [
2867 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2868 'mqdefault', 'mq1', 'mq2', 'mq3',
2869 'default', '1', '2', '3'
2870 ]
2871 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2872 n_thumbnail_names = len(thumbnail_names)
2873
0ba692ac 2874 thumbnails.extend({
2875 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2876 video_id=video_id, name=name, ext=ext,
2877 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2878 '_test_url': name in hq_thumbnail_names,
2879 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2880 for thumb in thumbnails:
cca80fe6 2881 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2882 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2883 self._remove_duplicate_formats(thumbnails)
545cc85d 2884
7ea65411 2885 category = get_first(microformats, 'category') or search_meta('genre')
2886 channel_id = str_or_none(
2887 get_first(video_details, 'channelId')
2888 or get_first(microformats, 'externalChannelId')
2889 or search_meta('channelId'))
2890 duration = int_or_none(
2891 get_first(video_details, 'lengthSeconds')
2892 or get_first(microformats, 'lengthSeconds')
2893 or parse_duration(search_meta('duration'))) or None
2894 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2895
2896 live_content = get_first(video_details, 'isLiveContent')
2897 is_upcoming = get_first(video_details, 'isUpcoming')
2898 if is_live is None:
2899 if is_upcoming or live_content is False:
2900 is_live = False
2901 if is_upcoming is None and (live_content or is_live):
2902 is_upcoming = False
2903 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2904 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2905 if not duration and live_endtime and live_starttime:
2906 duration = live_endtime - live_starttime
2907
545cc85d 2908 info = {
2909 'id': video_id,
2910 'title': self._live_title(video_title) if is_live else video_title,
2911 'formats': formats,
2912 'thumbnails': thumbnails,
2913 'description': video_description,
2914 'upload_date': unified_strdate(
11f9be09 2915 get_first(microformats, 'uploadDate')
545cc85d 2916 or search_meta('uploadDate')),
11f9be09 2917 'uploader': get_first(video_details, 'author'),
545cc85d 2918 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2919 'uploader_url': owner_profile_url,
2920 'channel_id': channel_id,
11f9be09 2921 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2922 'duration': duration,
2923 'view_count': int_or_none(
11f9be09 2924 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2925 or search_meta('interactionCount')),
11f9be09 2926 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2927 'age_limit': 18 if (
11f9be09 2928 get_first(microformats, 'isFamilySafe') is False
545cc85d 2929 or search_meta('isFamilyFriendly') == 'false'
2930 or search_meta('og:restrictions:age') == '18+') else 0,
2931 'webpage_url': webpage_url,
2932 'categories': [category] if category else None,
2933 'tags': keywords,
11f9be09 2934 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2935 'is_live': is_live,
2936 'was_live': (False if is_live or is_upcoming or live_content is False
2937 else None if is_live is None or is_upcoming is None
2938 else live_content),
2939 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2940 'release_timestamp': live_starttime,
545cc85d 2941 }
b477fc13 2942
3944e7af 2943 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2944 # Converted into dicts to remove duplicates
2945 captions = {
2946 sub.get('baseUrl'): sub
2947 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2948 translation_languages = {
2949 lang.get('languageCode'): lang.get('languageName')
2950 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
545cc85d 2951 subtitles = {}
2952 if pctr:
774d79cc 2953 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2954 lang_subs = container.setdefault(lang_code, [])
545cc85d 2955 for fmt in self._SUBTITLE_FORMATS:
2956 query.update({
2957 'fmt': fmt,
2958 })
2959 lang_subs.append({
2960 'ext': fmt,
2961 'url': update_url_query(base_url, query),
774d79cc 2962 'name': sub_name,
545cc85d 2963 })
7e72694b 2964
3944e7af 2965 for base_url, caption_track in captions.items():
545cc85d 2966 if not base_url:
2967 continue
2968 if caption_track.get('kind') != 'asr':
120916da 2969 lang_code = (
2970 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2971 or caption_track.get('languageCode'))
545cc85d 2972 if not lang_code:
2973 continue
2974 process_language(
774d79cc 2975 subtitles, base_url, lang_code,
3944e7af 2976 traverse_obj(caption_track, ('name', 'simpleText')),
774d79cc 2977 {})
545cc85d 2978 continue
2979 automatic_captions = {}
3944e7af 2980 for trans_code, trans_name in translation_languages.items():
2981 if not trans_code:
545cc85d 2982 continue
2983 process_language(
3944e7af 2984 automatic_captions, base_url, trans_code,
2985 self._get_text(trans_name, max_runs=1),
2986 {'tlang': trans_code})
545cc85d 2987 info['automatic_captions'] = automatic_captions
2988 info['subtitles'] = subtitles
7e72694b 2989
545cc85d 2990 parsed_url = compat_urllib_parse_urlparse(url)
2991 for component in [parsed_url.fragment, parsed_url.query]:
2992 query = compat_parse_qs(component)
2993 for k, v in query.items():
2994 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2995 d_k += '_time'
2996 if d_k not in info and k in s_ks:
2997 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2998
2999 # Youtube Music Auto-generated description
822b9d9c 3000 if video_description:
38d70284 3001 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 3002 if mobj:
822b9d9c
RA
3003 release_year = mobj.group('release_year')
3004 release_date = mobj.group('release_date')
3005 if release_date:
3006 release_date = release_date.replace('-', '')
3007 if not release_year:
545cc85d 3008 release_year = release_date[:4]
3009 info.update({
3010 'album': mobj.group('album'.strip()),
3011 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3012 'track': mobj.group('track').strip(),
3013 'release_date': release_date,
cc2db878 3014 'release_year': int_or_none(release_year),
545cc85d 3015 })
7e72694b 3016
545cc85d 3017 initial_data = None
3018 if webpage:
3019 initial_data = self._extract_yt_initial_variable(
3020 webpage, self._YT_INITIAL_DATA_RE, video_id,
3021 'yt initial data')
3022 if not initial_data:
11f9be09 3023 headers = self.generate_api_headers(
3024 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
3025 session_index=self._extract_session_index(master_ytcfg))
3026
109dd3b2 3027 initial_data = self._extract_response(
3028 item_id=video_id, ep='next', fatal=False,
11f9be09 3029 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
109dd3b2 3030 note='Downloading initial data API JSON')
545cc85d 3031
c60ee3a2 3032 try:
3033 # This will error if there is no livechat
3034 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3035 info['subtitles']['live_chat'] = [{
3036 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3037 'video_id': video_id,
3038 'ext': 'json',
f6745c49 3039 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3040 }]
3041 except (KeyError, IndexError, TypeError):
3042 pass
545cc85d 3043
3044 if initial_data:
7c365c21 3045 info['chapters'] = (
3046 self._extract_chapters_from_json(initial_data, duration)
3047 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3048 or None)
545cc85d 3049
3050 contents = try_get(
3051 initial_data,
3052 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3053 list) or []
3054 for content in contents:
3055 vpir = content.get('videoPrimaryInfoRenderer')
3056 if vpir:
3057 stl = vpir.get('superTitleLink')
3058 if stl:
fe93e2c4 3059 stl = self._get_text(stl)
545cc85d 3060 if try_get(
3061 vpir,
3062 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3063 info['location'] = stl
3064 else:
3065 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3066 if mobj:
3067 info.update({
3068 'series': mobj.group(1),
3069 'season_number': int(mobj.group(2)),
3070 'episode_number': int(mobj.group(3)),
3071 })
3072 for tlb in (try_get(
3073 vpir,
3074 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3075 list) or []):
3076 tbr = tlb.get('toggleButtonRenderer') or {}
3077 for getter, regex in [(
3078 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3079 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3080 lambda x: x['accessibility'],
3081 lambda x: x['accessibilityData']['accessibilityData'],
3082 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3083 label = (try_get(tbr, getter, dict) or {}).get('label')
3084 if label:
3085 mobj = re.match(regex, label)
3086 if mobj:
3087 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3088 break
3089 sbr_tooltip = try_get(
3090 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3091 if sbr_tooltip:
3092 like_count, dislike_count = sbr_tooltip.split(' / ')
3093 info.update({
3094 'like_count': str_to_int(like_count),
3095 'dislike_count': str_to_int(dislike_count),
3096 })
3097 vsir = content.get('videoSecondaryInfoRenderer')
3098 if vsir:
052e1350 3099 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3100 rows = try_get(
3101 vsir,
3102 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3103 list) or []
3104 multiple_songs = False
3105 for row in rows:
3106 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3107 multiple_songs = True
3108 break
3109 for row in rows:
3110 mrr = row.get('metadataRowRenderer') or {}
3111 mrr_title = mrr.get('title')
3112 if not mrr_title:
3113 continue
052e1350 3114 mrr_title = self._get_text(mrr, 'title')
3115 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3116 if mrr_title == 'License':
3117 info['license'] = mrr_contents_text
3118 elif not multiple_songs:
3119 if mrr_title == 'Album':
3120 info['album'] = mrr_contents_text
3121 elif mrr_title == 'Artist':
3122 info['artist'] = mrr_contents_text
3123 elif mrr_title == 'Song':
3124 info['track'] = mrr_contents_text
3125
3126 fallbacks = {
3127 'channel': 'uploader',
3128 'channel_id': 'uploader_id',
3129 'channel_url': 'uploader_url',
3130 }
3131 for to, frm in fallbacks.items():
3132 if not info.get(to):
3133 info[to] = info.get(frm)
3134
3135 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3136 v = info.get(s_k)
3137 if v:
3138 info[d_k] = v
b84071c0 3139
11f9be09 3140 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3141 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3142 is_membersonly = None
b28f8d24 3143 is_premium = None
c224251a
M
3144 if initial_data and is_private is not None:
3145 is_membersonly = False
b28f8d24 3146 is_premium = False
47193e02 3147 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3148 badge_labels = set()
3149 for content in contents:
3150 if not isinstance(content, dict):
3151 continue
3152 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3153 for badge_label in badge_labels:
3154 if badge_label.lower() == 'members only':
3155 is_membersonly = True
3156 elif badge_label.lower() == 'premium':
3157 is_premium = True
3158 elif badge_label.lower() == 'unlisted':
3159 is_unlisted = True
c224251a 3160
c224251a
M
3161 info['availability'] = self._availability(
3162 is_private=is_private,
b28f8d24 3163 needs_premium=is_premium,
c224251a
M
3164 needs_subscription=is_membersonly,
3165 needs_auth=info['age_limit'] >= 18,
3166 is_unlisted=None if is_private is None else is_unlisted)
3167
06167fbb 3168 # get xsrf for annotations or comments
a06916d9 3169 get_annotations = self.get_param('writeannotations', False)
3170 get_comments = self.get_param('getcomments', False)
06167fbb 3171 if get_annotations or get_comments:
29f7c58a 3172 xsrf_token = None
11f9be09 3173 if master_ytcfg:
3174 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
29f7c58a 3175 if not xsrf_token:
3176 xsrf_token = self._search_regex(
3177 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 3178 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 3179
3180 # annotations
06167fbb 3181 if get_annotations:
11f9be09 3182 invideo_url = get_first(
3183 player_responses,
3184 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3185 expected_type=str)
64b6a4e9 3186 if xsrf_token and invideo_url:
29f7c58a 3187 xsrf_field_name = None
11f9be09 3188 if master_ytcfg:
3189 xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
29f7c58a 3190 if not xsrf_field_name:
3191 xsrf_field_name = self._search_regex(
3192 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3193 webpage, 'xsrf field name',
29f7c58a 3194 group='xsrf_field_name', default='session_token')
8a784c74 3195 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3196 self._proto_relative_url(invideo_url),
3197 video_id, note='Downloading annotations',
3198 errnote='Unable to download video annotations', fatal=False,
3199 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3200
277d6ff5 3201 if get_comments:
11f9be09 3202 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3203
11f9be09 3204 self.mark_watched(video_id, player_responses)
d77ab8e2 3205
545cc85d 3206 return info
c5e8d7af 3207
5f6a1245 3208
8bdd16b4 3209class YoutubeTabIE(YoutubeBaseInfoExtractor):
3210 IE_DESC = 'YouTube.com tab'
70d5c17b 3211 _VALID_URL = r'''(?x)
3212 https?://
3213 (?:\w+\.)?
3214 (?:
3215 youtube(?:kids)?\.com|
3216 invidio\.us
3217 )/
3218 (?:
fe03a6cd 3219 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3220 (?P<not_channel>
9ba5705a 3221 feed/|hashtag/|
70d5c17b 3222 (?:playlist|watch)\?.*?\blist=
3223 )|
29f7c58a 3224 (?!(?:%s)\b) # Direct URLs
70d5c17b 3225 )
3226 (?P<id>[^/?\#&]+)
3227 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3228 IE_NAME = 'youtube:tab'
3229
81127aa5 3230 _TESTS = [{
da692b79 3231 'note': 'playlists, multipage',
8bdd16b4 3232 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3233 'playlist_mincount': 94,
3234 'info_dict': {
3235 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3236 'title': 'Игорь Клейнер - Playlists',
3237 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3238 'uploader': 'Игорь Клейнер',
3239 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3240 },
3241 }, {
da692b79 3242 'note': 'playlists, multipage, different order',
8bdd16b4 3243 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3244 'playlist_mincount': 94,
3245 'info_dict': {
3246 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3247 'title': 'Игорь Клейнер - Playlists',
3248 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3249 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3250 'uploader': 'Игорь Клейнер',
8bdd16b4 3251 },
201c1459 3252 }, {
da692b79 3253 'note': 'playlists, series',
201c1459 3254 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3255 'playlist_mincount': 5,
3256 'info_dict': {
3257 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3258 'title': '3Blue1Brown - Playlists',
3259 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3260 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3261 'uploader': '3Blue1Brown',
201c1459 3262 },
8bdd16b4 3263 }, {
da692b79 3264 'note': 'playlists, singlepage',
8bdd16b4 3265 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3266 'playlist_mincount': 4,
3267 'info_dict': {
3268 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3269 'title': 'ThirstForScience - Playlists',
3270 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3271 'uploader': 'ThirstForScience',
3272 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3273 }
3274 }, {
3275 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3276 'only_matching': True,
3277 }, {
da692b79 3278 'note': 'basic, single video playlist',
0e30a7b9 3279 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3280 'info_dict': {
0e30a7b9 3281 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3282 'uploader': 'Sergey M.',
3283 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3284 'title': 'youtube-dl public playlist',
81127aa5 3285 },
0e30a7b9 3286 'playlist_count': 1,
9291475f 3287 }, {
da692b79 3288 'note': 'empty playlist',
0e30a7b9 3289 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3290 'info_dict': {
0e30a7b9 3291 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3292 'uploader': 'Sergey M.',
3293 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3294 'title': 'youtube-dl empty playlist',
9291475f
PH
3295 },
3296 'playlist_count': 0,
3297 }, {
da692b79 3298 'note': 'Home tab',
8bdd16b4 3299 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3300 'info_dict': {
8bdd16b4 3301 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3302 'title': 'lex will - Home',
3303 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3304 'uploader': 'lex will',
3305 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3306 },
8bdd16b4 3307 'playlist_mincount': 2,
9291475f 3308 }, {
da692b79 3309 'note': 'Videos tab',
8bdd16b4 3310 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3311 'info_dict': {
8bdd16b4 3312 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3313 'title': 'lex will - Videos',
3314 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3315 'uploader': 'lex will',
3316 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3317 },
8bdd16b4 3318 'playlist_mincount': 975,
9291475f 3319 }, {
da692b79 3320 'note': 'Videos tab, sorted by popular',
8bdd16b4 3321 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3322 'info_dict': {
8bdd16b4 3323 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3324 'title': 'lex will - Videos',
3325 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3326 'uploader': 'lex will',
3327 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3328 },
8bdd16b4 3329 'playlist_mincount': 199,
9291475f 3330 }, {
da692b79 3331 'note': 'Playlists tab',
8bdd16b4 3332 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3333 'info_dict': {
8bdd16b4 3334 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3335 'title': 'lex will - Playlists',
3336 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3337 'uploader': 'lex will',
3338 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3339 },
8bdd16b4 3340 'playlist_mincount': 17,
ac7553d0 3341 }, {
da692b79 3342 'note': 'Community tab',
8bdd16b4 3343 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3344 'info_dict': {
8bdd16b4 3345 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3346 'title': 'lex will - Community',
3347 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3348 'uploader': 'lex will',
3349 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3350 },
3351 'playlist_mincount': 18,
87dadd45 3352 }, {
da692b79 3353 'note': 'Channels tab',
8bdd16b4 3354 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3355 'info_dict': {
8bdd16b4 3356 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3357 'title': 'lex will - Channels',
3358 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3359 'uploader': 'lex will',
3360 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3361 },
deaec5af 3362 'playlist_mincount': 12,
cd684175 3363 }, {
3364 'note': 'Search tab',
3365 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3366 'playlist_mincount': 40,
3367 'info_dict': {
3368 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3369 'title': '3Blue1Brown - Search - linear algebra',
3370 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3371 'uploader': '3Blue1Brown',
3372 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3373 },
6b08cdf6 3374 }, {
a0566bbf 3375 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3376 'only_matching': True,
3377 }, {
a0566bbf 3378 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3379 'only_matching': True,
3380 }, {
a0566bbf 3381 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3382 'only_matching': True,
3383 }, {
3384 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3385 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3386 'info_dict': {
3387 'title': '29C3: Not my department',
3388 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3389 'uploader': 'Christiaan008',
3390 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3391 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3392 },
3393 'playlist_count': 96,
3394 }, {
3395 'note': 'Large playlist',
3396 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3397 'info_dict': {
8bdd16b4 3398 'title': 'Uploads from Cauchemar',
3399 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3400 'uploader': 'Cauchemar',
3401 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3402 },
8bdd16b4 3403 'playlist_mincount': 1123,
3404 }, {
da692b79 3405 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3406 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3407 'only_matching': True,
4b7df0d3
JMF
3408 }, {
3409 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3410 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3411 'info_dict': {
acf757f4
PH
3412 'title': 'Uploads from Interstellar Movie',
3413 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3414 'uploader': 'Interstellar Movie',
8bdd16b4 3415 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3416 },
481cc733 3417 'playlist_mincount': 21,
358de58c 3418 }, {
3419 'note': 'Playlist with "show unavailable videos" button',
3420 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3421 'info_dict': {
3422 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3423 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3424 'uploader': 'Phim Siêu Nhân Nhật Bản',
3425 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3426 },
da692b79 3427 'playlist_mincount': 200,
5d342002 3428 }, {
da692b79 3429 'note': 'Playlist with unavailable videos in page 7',
5d342002 3430 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3431 'info_dict': {
3432 'title': 'Uploads from BlankTV',
3433 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3434 'uploader': 'BlankTV',
3435 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3436 },
da692b79 3437 'playlist_mincount': 1000,
8bdd16b4 3438 }, {
da692b79 3439 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3440 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3441 'info_dict': {
3442 'title': 'Data Analysis with Dr Mike Pound',
3443 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3444 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3445 'uploader': 'Computerphile',
deaec5af 3446 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3447 },
3448 'playlist_mincount': 11,
3449 }, {
a0566bbf 3450 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3451 'only_matching': True,
dacb3a86 3452 }, {
da692b79 3453 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3454 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3455 'info_dict': {
3456 'id': 'FqZTN594JQw',
3457 'ext': 'webm',
3458 'title': "Smiley's People 01 detective, Adventure Series, Action",
3459 'uploader': 'STREEM',
3460 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3461 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3462 'upload_date': '20150526',
3463 'license': 'Standard YouTube License',
3464 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3465 'categories': ['People & Blogs'],
3466 'tags': list,
dbdaaa23 3467 'view_count': int,
dacb3a86
S
3468 'like_count': int,
3469 'dislike_count': int,
3470 },
3471 'params': {
3472 'skip_download': True,
3473 },
13a75688 3474 'skip': 'This video is not available.',
dacb3a86 3475 'add_ie': [YoutubeIE.ie_key()],
481cc733 3476 }, {
8bdd16b4 3477 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3478 'only_matching': True,
66b48727 3479 }, {
8bdd16b4 3480 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3481 'only_matching': True,
a0566bbf 3482 }, {
3483 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3484 'info_dict': {
57015a4a 3485 'id': '3yImotZU3tw', # This will keep changing
a0566bbf 3486 'ext': 'mp4',
deaec5af 3487 'title': compat_str,
a0566bbf 3488 'uploader': 'Sky News',
3489 'uploader_id': 'skynews',
3490 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3491 'upload_date': r're:\d{8}',
3492 'description': compat_str,
a0566bbf 3493 'categories': ['News & Politics'],
3494 'tags': list,
3495 'like_count': int,
3496 'dislike_count': int,
3497 },
3498 'params': {
3499 'skip_download': True,
3500 },
da692b79 3501 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3502 }, {
3503 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3504 'info_dict': {
3505 'id': 'a48o2S1cPoo',
3506 'ext': 'mp4',
3507 'title': 'The Young Turks - Live Main Show',
3508 'uploader': 'The Young Turks',
3509 'uploader_id': 'TheYoungTurks',
3510 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3511 'upload_date': '20150715',
3512 'license': 'Standard YouTube License',
3513 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3514 'categories': ['News & Politics'],
3515 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3516 'like_count': int,
3517 'dislike_count': int,
3518 },
3519 'params': {
3520 'skip_download': True,
3521 },
3522 'only_matching': True,
3523 }, {
3524 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3525 'only_matching': True,
3526 }, {
3527 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3528 'only_matching': True,
09f1580e 3529 }, {
3530 'note': 'A channel that is not live. Should raise error',
3531 'url': 'https://www.youtube.com/user/numberphile/live',
3532 'only_matching': True,
3d3dddc9 3533 }, {
3534 'url': 'https://www.youtube.com/feed/trending',
3535 'only_matching': True,
3536 }, {
3d3dddc9 3537 'url': 'https://www.youtube.com/feed/library',
3538 'only_matching': True,
3539 }, {
3d3dddc9 3540 'url': 'https://www.youtube.com/feed/history',
3541 'only_matching': True,
3542 }, {
3d3dddc9 3543 'url': 'https://www.youtube.com/feed/subscriptions',
3544 'only_matching': True,
3545 }, {
3d3dddc9 3546 'url': 'https://www.youtube.com/feed/watch_later',
3547 'only_matching': True,
3548 }, {
da692b79 3549 'note': 'Recommended - redirects to home page',
3d3dddc9 3550 'url': 'https://www.youtube.com/feed/recommended',
3551 'only_matching': True,
29f7c58a 3552 }, {
da692b79 3553 'note': 'inline playlist with not always working continuations',
29f7c58a 3554 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3555 'only_matching': True,
3556 }, {
3557 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3558 'only_matching': True,
3559 }, {
3560 'url': 'https://www.youtube.com/course',
3561 'only_matching': True,
3562 }, {
3563 'url': 'https://www.youtube.com/zsecurity',
3564 'only_matching': True,
3565 }, {
3566 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3567 'only_matching': True,
3568 }, {
3569 'url': 'https://www.youtube.com/TheYoungTurks/live',
3570 'only_matching': True,
39ed931e 3571 }, {
3572 'url': 'https://www.youtube.com/hashtag/cctv9',
3573 'info_dict': {
3574 'id': 'cctv9',
3575 'title': '#cctv9',
3576 },
3577 'playlist_mincount': 350,
201c1459 3578 }, {
3579 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3580 'only_matching': True,
9297939e 3581 }, {
da692b79 3582 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3583 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3584 'only_matching': True
fe03a6cd 3585 }, {
3586 'note': '/browse/ should redirect to /channel/',
3587 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3588 'only_matching': True
3589 }, {
3590 'note': 'VLPL, should redirect to playlist?list=PL...',
3591 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3592 'info_dict': {
3593 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3594 'uploader': 'NoCopyrightSounds',
3595 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3596 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3597 'title': 'NCS Releases',
3598 },
3599 'playlist_mincount': 166,
18db7548 3600 }, {
3601 'note': 'Topic, should redirect to playlist?list=UU...',
3602 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3603 'info_dict': {
3604 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3605 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3606 'title': 'Uploads from Royalty Free Music - Topic',
3607 'uploader': 'Royalty Free Music - Topic',
3608 },
3609 'expected_warnings': [
3610 'A channel/user page was given',
3611 'The URL does not have a videos tab',
3612 ],
3613 'playlist_mincount': 101,
3614 }, {
3615 'note': 'Topic without a UU playlist',
3616 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3617 'info_dict': {
3618 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3619 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3620 },
3621 'expected_warnings': [
3622 'A channel/user page was given',
3623 'The URL does not have a videos tab',
3624 'Falling back to channel URL',
3625 ],
3626 'playlist_mincount': 9,
abcdd12b 3627 }, {
3628 'note': 'Youtube music Album',
3629 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3630 'info_dict': {
3631 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3632 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3633 },
3634 'playlist_count': 50,
47193e02 3635 }, {
3636 'note': 'unlisted single video playlist',
3637 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3638 'info_dict': {
3639 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3640 'uploader': 'colethedj',
3641 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3642 'title': 'yt-dlp unlisted playlist test',
3643 'availability': 'unlisted'
3644 },
3645 'playlist_count': 1,
29f7c58a 3646 }]
3647
3648 @classmethod
3649 def suitable(cls, url):
3650 return False if YoutubeIE.suitable(url) else super(
3651 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3652
3653 def _extract_channel_id(self, webpage):
3654 channel_id = self._html_search_meta(
3655 'channelId', webpage, 'channel id', default=None)
3656 if channel_id:
3657 return channel_id
3658 channel_url = self._html_search_meta(
3659 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3660 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3661 'twitter:app:url:googleplay'), webpage, 'channel url')
3662 return self._search_regex(
3663 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3664 channel_url, 'channel id')
15f6397c 3665
8bdd16b4 3666 @staticmethod
cd7c66cf 3667 def _extract_basic_item_renderer(item):
3668 # Modified from _extract_grid_item_renderer
201c1459 3669 known_basic_renderers = (
3670 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3671 )
3672 for key, renderer in item.items():
201c1459 3673 if not isinstance(renderer, dict):
cd7c66cf 3674 continue
201c1459 3675 elif key in known_basic_renderers:
3676 return renderer
3677 elif key.startswith('grid') and key.endswith('Renderer'):
3678 return renderer
8bdd16b4 3679
8bdd16b4 3680 def _grid_entries(self, grid_renderer):
3681 for item in grid_renderer['items']:
3682 if not isinstance(item, dict):
39b62db1 3683 continue
cd7c66cf 3684 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3685 if not isinstance(renderer, dict):
3686 continue
052e1350 3687 title = self._get_text(renderer, 'title')
fe93e2c4 3688
8bdd16b4 3689 # playlist
3690 playlist_id = renderer.get('playlistId')
3691 if playlist_id:
3692 yield self.url_result(
3693 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3694 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3695 video_title=title)
201c1459 3696 continue
8bdd16b4 3697 # video
3698 video_id = renderer.get('videoId')
3699 if video_id:
3700 yield self._extract_video(renderer)
201c1459 3701 continue
8bdd16b4 3702 # channel
3703 channel_id = renderer.get('channelId')
3704 if channel_id:
8bdd16b4 3705 yield self.url_result(
3706 'https://www.youtube.com/channel/%s' % channel_id,
3707 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3708 continue
3709 # generic endpoint URL support
3710 ep_url = urljoin('https://www.youtube.com/', try_get(
3711 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3712 compat_str))
3713 if ep_url:
3714 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3715 if ie.suitable(ep_url):
3716 yield self.url_result(
3717 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3718 break
8bdd16b4 3719
3d3dddc9 3720 def _shelf_entries_from_content(self, shelf_renderer):
3721 content = shelf_renderer.get('content')
3722 if not isinstance(content, dict):
8bdd16b4 3723 return
cd7c66cf 3724 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3725 if renderer:
3726 # TODO: add support for nested playlists so each shelf is processed
3727 # as separate playlist
3728 # TODO: this includes only first N items
3729 for entry in self._grid_entries(renderer):
3730 yield entry
3731 renderer = content.get('horizontalListRenderer')
3732 if renderer:
3733 # TODO
3734 pass
8bdd16b4 3735
29f7c58a 3736 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3737 ep = try_get(
3738 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3739 compat_str)
3740 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3741 if shelf_url:
29f7c58a 3742 # Skipping links to another channels, note that checking for
3743 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3744 # will not work
3745 if skip_channels and '/channels?' in shelf_url:
3746 return
052e1350 3747 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3748 yield self.url_result(shelf_url, video_title=title)
3749 # Shelf may not contain shelf URL, fallback to extraction from content
3750 for entry in self._shelf_entries_from_content(shelf_renderer):
3751 yield entry
c5e8d7af 3752
8bdd16b4 3753 def _playlist_entries(self, video_list_renderer):
3754 for content in video_list_renderer['contents']:
3755 if not isinstance(content, dict):
3756 continue
3757 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3758 if not isinstance(renderer, dict):
3759 continue
3760 video_id = renderer.get('videoId')
3761 if not video_id:
3762 continue
3763 yield self._extract_video(renderer)
07aeced6 3764
3462ffa8 3765 def _rich_entries(self, rich_grid_renderer):
3766 renderer = try_get(
70d5c17b 3767 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3768 video_id = renderer.get('videoId')
3769 if not video_id:
3770 return
3771 yield self._extract_video(renderer)
3772
8bdd16b4 3773 def _video_entry(self, video_renderer):
3774 video_id = video_renderer.get('videoId')
3775 if video_id:
3776 return self._extract_video(video_renderer)
dacb3a86 3777
8bdd16b4 3778 def _post_thread_entries(self, post_thread_renderer):
3779 post_renderer = try_get(
3780 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3781 if not post_renderer:
3782 return
3783 # video attachment
3784 video_renderer = try_get(
895b0931 3785 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3786 video_id = video_renderer.get('videoId')
3787 if video_id:
3788 entry = self._extract_video(video_renderer)
8bdd16b4 3789 if entry:
3790 yield entry
895b0931 3791 # playlist attachment
3792 playlist_id = try_get(
3793 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3794 if playlist_id:
3795 yield self.url_result(
e28f1c0a 3796 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3797 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3798 # inline video links
3799 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3800 for run in runs:
3801 if not isinstance(run, dict):
3802 continue
3803 ep_url = try_get(
3804 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3805 if not ep_url:
3806 continue
3807 if not YoutubeIE.suitable(ep_url):
3808 continue
3809 ep_video_id = YoutubeIE._match_id(ep_url)
3810 if video_id == ep_video_id:
3811 continue
895b0931 3812 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3813
8bdd16b4 3814 def _post_thread_continuation_entries(self, post_thread_continuation):
3815 contents = post_thread_continuation.get('contents')
3816 if not isinstance(contents, list):
3817 return
3818 for content in contents:
3819 renderer = content.get('backstagePostThreadRenderer')
3820 if not isinstance(renderer, dict):
3821 continue
3822 for entry in self._post_thread_entries(renderer):
3823 yield entry
07aeced6 3824
39ed931e 3825 r''' # unused
3826 def _rich_grid_entries(self, contents):
3827 for content in contents:
3828 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3829 if video_renderer:
3830 entry = self._video_entry(video_renderer)
3831 if entry:
3832 yield entry
3833 '''
f4f751af 3834 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3835
70d5c17b 3836 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3837 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3838 for content in contents:
3839 if not isinstance(content, dict):
8bdd16b4 3840 continue
70d5c17b 3841 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3842 if not is_renderer:
70d5c17b 3843 renderer = content.get('richItemRenderer')
3462ffa8 3844 if renderer:
3845 for entry in self._rich_entries(renderer):
3846 yield entry
3847 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3848 continue
3462ffa8 3849 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3850 for isr_content in isr_contents:
3851 if not isinstance(isr_content, dict):
3852 continue
69184e41 3853
3854 known_renderers = {
3855 'playlistVideoListRenderer': self._playlist_entries,
3856 'gridRenderer': self._grid_entries,
3857 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3858 'backstagePostThreadRenderer': self._post_thread_entries,
3859 'videoRenderer': lambda x: [self._video_entry(x)],
3860 }
3861 for key, renderer in isr_content.items():
3862 if key not in known_renderers:
3863 continue
3864 for entry in known_renderers[key](renderer):
3865 if entry:
3866 yield entry
3462ffa8 3867 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3868 break
70d5c17b 3869
3462ffa8 3870 if not continuation_list[0]:
3871 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3872
3873 if not continuation_list[0]:
3874 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3875
3876 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3877 tab_content = try_get(tab, lambda x: x['content'], dict)
3878 if not tab_content:
3879 return
3462ffa8 3880 parent_renderer = (
29f7c58a 3881 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3882 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3883 for entry in extract_entries(parent_renderer):
3884 yield entry
3462ffa8 3885 continuation = continuation_list[0]
fe93e2c4 3886 visitor_data = None
d069eca7 3887
8bdd16b4 3888 for page_num in itertools.count(1):
3889 if not continuation:
3890 break
11f9be09 3891 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3892 response = self._extract_response(
3893 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3894 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3895 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3896
3897 if not response:
8bdd16b4 3898 break
f4f751af 3899 visitor_data = try_get(
3900 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3901
69184e41 3902 known_continuation_renderers = {
3903 'playlistVideoListContinuation': self._playlist_entries,
3904 'gridContinuation': self._grid_entries,
3905 'itemSectionContinuation': self._post_thread_continuation_entries,
3906 'sectionListContinuation': extract_entries, # for feeds
3907 }
8bdd16b4 3908 continuation_contents = try_get(
69184e41 3909 response, lambda x: x['continuationContents'], dict) or {}
3910 continuation_renderer = None
3911 for key, value in continuation_contents.items():
3912 if key not in known_continuation_renderers:
3462ffa8 3913 continue
69184e41 3914 continuation_renderer = value
3915 continuation_list = [None]
3916 for entry in known_continuation_renderers[key](continuation_renderer):
3917 yield entry
3918 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3919 break
3920 if continuation_renderer:
3921 continue
c5e8d7af 3922
a1b535bd 3923 known_renderers = {
3924 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3925 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3926 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3927 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3928 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3929 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3930 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3931 }
cce889b9 3932 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3933 continuation_items = try_get(
cce889b9 3934 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3935 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3936 video_items_renderer = None
3937 for key, value in continuation_item.items():
3938 if key not in known_renderers:
8bdd16b4 3939 continue
a1b535bd 3940 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3941 continuation_list = [None]
a1b535bd 3942 for entry in known_renderers[key][0](video_items_renderer):
3943 yield entry
9ba5705a 3944 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3945 break
3946 if video_items_renderer:
3947 continue
8bdd16b4 3948 break
9558dcec 3949
8bdd16b4 3950 @staticmethod
3951 def _extract_selected_tab(tabs):
3952 for tab in tabs:
cd684175 3953 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3954 if renderer.get('selected') is True:
3955 return renderer
2b3c2546 3956 else:
8bdd16b4 3957 raise ExtractorError('Unable to find selected tab')
b82f815f 3958
47193e02 3959 @classmethod
3960 def _extract_uploader(cls, data):
8bdd16b4 3961 uploader = {}
47193e02 3962 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3963 owner = try_get(
3964 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3965 if owner:
3966 uploader['uploader'] = owner.get('text')
3967 uploader['uploader_id'] = try_get(
3968 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3969 uploader['uploader_url'] = urljoin(
3970 'https://www.youtube.com/',
3971 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3972 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3973
d069eca7 3974 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3975 playlist_id = title = description = channel_url = channel_name = channel_id = None
3976 thumbnails_list = tags = []
3977
8bdd16b4 3978 selected_tab = self._extract_selected_tab(tabs)
3979 renderer = try_get(
3980 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3981 if renderer:
b60419c5 3982 channel_name = renderer.get('title')
3983 channel_url = renderer.get('channelUrl')
3984 channel_id = renderer.get('externalId')
39ed931e 3985 else:
64c0d954 3986 renderer = try_get(
3987 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3988
8bdd16b4 3989 if renderer:
3990 title = renderer.get('title')
ecc97af3 3991 description = renderer.get('description', '')
b60419c5 3992 playlist_id = channel_id
3993 tags = renderer.get('keywords', '').split()
3994 thumbnails_list = (
3995 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3996 or try_get(
47193e02 3997 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3998 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3999 list)
b60419c5 4000 or [])
4001
4002 thumbnails = []
4003 for t in thumbnails_list:
4004 if not isinstance(t, dict):
4005 continue
4006 thumbnail_url = url_or_none(t.get('url'))
4007 if not thumbnail_url:
4008 continue
4009 thumbnails.append({
4010 'url': thumbnail_url,
4011 'width': int_or_none(t.get('width')),
4012 'height': int_or_none(t.get('height')),
4013 })
3462ffa8 4014 if playlist_id is None:
70d5c17b 4015 playlist_id = item_id
4016 if title is None:
39ed931e 4017 title = (
4018 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
4019 or playlist_id)
b60419c5 4020 title += format_field(selected_tab, 'title', ' - %s')
cd684175 4021 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 4022 metadata = {
4023 'playlist_id': playlist_id,
4024 'playlist_title': title,
4025 'playlist_description': description,
4026 'uploader': channel_name,
4027 'uploader_id': channel_id,
4028 'uploader_url': channel_url,
4029 'thumbnails': thumbnails,
4030 'tags': tags,
4031 }
47193e02 4032 availability = self._extract_availability(data)
4033 if availability:
4034 metadata['availability'] = availability
b60419c5 4035 if not channel_id:
4036 metadata.update(self._extract_uploader(data))
4037 metadata.update({
4038 'channel': metadata['uploader'],
4039 'channel_id': metadata['uploader_id'],
4040 'channel_url': metadata['uploader_url']})
11f9be09 4041 ytcfg = self.extract_ytcfg(item_id, webpage)
b60419c5 4042 return self.playlist_result(
d069eca7
M
4043 self._entries(
4044 selected_tab, playlist_id,
4045 self._extract_identity_token(webpage, item_id),
fe93e2c4 4046 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 4047 **metadata)
73c4ac2c 4048
79360d99 4049 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 4050 first_id = last_id = None
11f9be09 4051 ytcfg = self.extract_ytcfg(playlist_id, webpage)
4052 headers = self.generate_api_headers(
fe93e2c4 4053 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4054 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 4055 for page_num in itertools.count(1):
cd7c66cf 4056 videos = list(self._playlist_entries(playlist))
4057 if not videos:
4058 return
2be71994 4059 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4060 if start >= len(videos):
4061 return
4062 for video in videos[start:]:
4063 if video['id'] == first_id:
4064 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4065 return
4066 yield video
4067 first_id = first_id or videos[0]['id']
4068 last_id = videos[-1]['id']
79360d99 4069 watch_endpoint = try_get(
4070 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4071 query = {
4072 'playlistId': playlist_id,
4073 'videoId': watch_endpoint.get('videoId') or last_id,
4074 'index': watch_endpoint.get('index') or len(videos),
4075 'params': watch_endpoint.get('params') or 'OAE%3D'
4076 }
4077 response = self._extract_response(
4078 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4079 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4080 check_get_keys='contents'
4081 )
cd7c66cf 4082 playlist = try_get(
79360d99 4083 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4084
79360d99 4085 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 4086 title = playlist.get('title') or try_get(
4087 data, lambda x: x['titleText']['simpleText'], compat_str)
4088 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4089
4090 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4091 playlist_url = urljoin(url, try_get(
4092 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4093 compat_str))
4094 if playlist_url and playlist_url != url:
4095 return self.url_result(
4096 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4097 video_title=title)
cd7c66cf 4098
8bdd16b4 4099 return self.playlist_result(
79360d99 4100 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 4101 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4102
47193e02 4103 def _extract_availability(self, data):
4104 """
4105 Gets the availability of a given playlist/tab.
4106 Note: Unless YouTube tells us explicitly, we do not assume it is public
4107 @param data: response
4108 """
4109 is_private = is_unlisted = None
4110 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4111 badge_labels = self._extract_badges(renderer)
4112
4113 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4114 privacy_dropdown_entries = try_get(
4115 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4116 for renderer_dict in privacy_dropdown_entries:
4117 is_selected = try_get(
4118 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4119 if not is_selected:
4120 continue
052e1350 4121 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4122 if label:
4123 badge_labels.add(label.lower())
4124 break
4125
4126 for badge_label in badge_labels:
4127 if badge_label == 'unlisted':
4128 is_unlisted = True
4129 elif badge_label == 'private':
4130 is_private = True
4131 elif badge_label == 'public':
4132 is_unlisted = is_private = False
4133 return self._availability(is_private, False, False, False, is_unlisted)
4134
4135 @staticmethod
4136 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4137 sidebar_renderer = try_get(
4138 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4139 for item in sidebar_renderer:
4140 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4141 if renderer:
4142 return renderer
4143
358de58c 4144 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4145 """
4146 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4147 """
5d342002 4148 browse_id = params = None
47193e02 4149 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4150 if not renderer:
4151 return
4152 menu_renderer = try_get(
4153 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4154 for menu_item in menu_renderer:
4155 if not isinstance(menu_item, dict):
358de58c 4156 continue
47193e02 4157 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4158 text = try_get(
4159 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4160 if not text or text.lower() != 'show unavailable videos':
4161 continue
4162 browse_endpoint = try_get(
4163 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4164 browse_id = browse_endpoint.get('browseId')
4165 params = browse_endpoint.get('params')
4166 break
5d342002 4167
11f9be09 4168 ytcfg = self.extract_ytcfg(item_id, webpage)
4169 headers = self.generate_api_headers(
fe93e2c4 4170 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 4171 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4172 visitor_data=try_get(
4173 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4174 query = {
4175 'params': params or 'wgYCCAA=',
4176 'browseId': browse_id or 'VL%s' % item_id
4177 }
4178 return self._extract_response(
4179 item_id=item_id, headers=headers, query=query,
fe93e2c4 4180 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4181 note='Downloading API JSON with unavailable videos')
358de58c 4182
cd7c66cf 4183 def _extract_webpage(self, url, item_id):
a06916d9 4184 retries = self.get_param('extractor_retries', 3)
62bff2c1 4185 count = -1
c705177d 4186 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4187 while count < retries:
62bff2c1 4188 count += 1
14fdfea9 4189 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4190 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4191 if count:
c705177d 4192 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4193 webpage = self._download_webpage(
4194 url, item_id,
cd7c66cf 4195 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
11f9be09 4196 data = self.extract_yt_initial_data(item_id, webpage)
14fdfea9 4197 if data.get('contents') or data.get('currentVideoEndpoint'):
4198 break
95c01b6c 4199 # Extract alerts here only when there is error
4200 self._extract_and_report_alerts(data)
c705177d 4201 if count >= retries:
6a39ee13 4202 raise ExtractorError(last_error)
cd7c66cf 4203 return webpage, data
4204
9297939e 4205 @staticmethod
4206 def _smuggle_data(entries, data):
4207 for entry in entries:
4208 if data:
4209 entry['url'] = smuggle_url(entry['url'], data)
4210 yield entry
4211
cd7c66cf 4212 def _real_extract(self, url):
9297939e 4213 url, smuggled_data = unsmuggle_url(url, {})
4214 if self.is_music_url(url):
4215 smuggled_data['is_music_url'] = True
fe03a6cd 4216 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4217 if info_dict.get('entries'):
4218 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4219 return info_dict
4220
fe03a6cd 4221 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4222
4223 def __real_extract(self, url, smuggled_data):
cd7c66cf 4224 item_id = self._match_id(url)
4225 url = compat_urlparse.urlunparse(
4226 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4227 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4228
fe03a6cd 4229 def get_mobj(url):
4230 mobj = self._url_re.match(url).groupdict()
07cce701 4231 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4232 return mobj
4233
4234 mobj = get_mobj(url)
4235 # Youtube returns incomplete data if tabname is not lower case
4236 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4237
4238 if is_channel:
4239 if smuggled_data.get('is_music_url'):
4240 if item_id[:2] == 'VL':
4241 # Youtube music VL channels have an equivalent playlist
4242 item_id = item_id[2:]
4243 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4244 elif item_id[:2] == 'MP':
4245 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4246 item_id = self._search_regex(
4247 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4248 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4249 'playlist id')
4250 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4251 elif mobj['channel_type'] == 'browse':
4252 # Youtube music /browse/ should be changed to /channel/
4253 pre = 'https://www.youtube.com/channel/%s' % item_id
4254 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4255 # Home URLs should redirect to /videos/
6a39ee13 4256 self.report_warning(
cd7c66cf 4257 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4258 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4259 tab = '/videos'
4260
4261 url = ''.join((pre, tab, post))
4262 mobj = get_mobj(url)
cd7c66cf 4263
4264 # Handle both video/playlist URLs
201c1459 4265 qs = parse_qs(url)
cd7c66cf 4266 video_id = qs.get('v', [None])[0]
4267 playlist_id = qs.get('list', [None])[0]
4268
fe03a6cd 4269 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4270 if not playlist_id:
fe03a6cd 4271 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4272 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4273 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4274 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4275 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4276 mobj = get_mobj(url)
cd7c66cf 4277
4278 if video_id and playlist_id:
a06916d9 4279 if self.get_param('noplaylist'):
cd7c66cf 4280 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4281 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4282 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4283
4284 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4285
18db7548 4286 tabs = try_get(
4287 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4288 if tabs:
4289 selected_tab = self._extract_selected_tab(tabs)
4290 tab_name = selected_tab.get('title', '')
09f1580e 4291 if 'no-youtube-channel-redirect' not in compat_opts:
4292 if mobj['tab'] == '/live':
4293 # Live tab should have redirected to the video
4294 raise ExtractorError('The channel is not currently live', expected=True)
4295 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4296 if not mobj['not_channel'] and item_id[:2] == 'UC':
4297 # Topic channels don't have /videos. Use the equivalent playlist instead
4298 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4299 pl_id = 'UU%s' % item_id[2:]
4300 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4301 try:
4302 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4303 for alert_type, alert_message in self._extract_alerts(pl_data):
4304 if alert_type == 'error':
4305 raise ExtractorError('Youtube said: %s' % alert_message)
4306 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4307 except ExtractorError:
4308 self.report_warning('The playlist gave error. Falling back to channel URL')
4309 else:
4310 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4311
4312 self.write_debug('Final URL: %s' % url)
4313
358de58c 4314 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4315 if 'no-youtube-unavailable-videos' not in compat_opts:
4316 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4317 self._extract_and_report_alerts(data)
8bdd16b4 4318 tabs = try_get(
4319 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4320 if tabs:
d069eca7 4321 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4322
8bdd16b4 4323 playlist = try_get(
4324 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4325 if playlist:
79360d99 4326 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4327
a0566bbf 4328 video_id = try_get(
4329 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4330 compat_str) or video_id
8bdd16b4 4331 if video_id:
09f1580e 4332 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4333 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4334 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4335
8bdd16b4 4336 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4337
c5e8d7af 4338
8bdd16b4 4339class YoutubePlaylistIE(InfoExtractor):
4340 IE_DESC = 'YouTube.com playlists'
4341 _VALID_URL = r'''(?x)(?:
4342 (?:https?://)?
4343 (?:\w+\.)?
4344 (?:
4345 (?:
4346 youtube(?:kids)?\.com|
29f7c58a 4347 invidio\.us
8bdd16b4 4348 )
4349 /.*?\?.*?\blist=
4350 )?
4351 (?P<id>%(playlist_id)s)
4352 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4353 IE_NAME = 'youtube:playlist'
cdc628a4 4354 _TESTS = [{
8bdd16b4 4355 'note': 'issue #673',
4356 'url': 'PLBB231211A4F62143',
cdc628a4 4357 'info_dict': {
8bdd16b4 4358 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4359 'id': 'PLBB231211A4F62143',
4360 'uploader': 'Wickydoo',
4361 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4362 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4363 },
4364 'playlist_mincount': 29,
4365 }, {
4366 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4367 'info_dict': {
4368 'title': 'YDL_safe_search',
4369 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4370 },
4371 'playlist_count': 2,
4372 'skip': 'This playlist is private',
9558dcec 4373 }, {
8bdd16b4 4374 'note': 'embedded',
4375 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4376 'playlist_count': 4,
9558dcec 4377 'info_dict': {
8bdd16b4 4378 'title': 'JODA15',
4379 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4380 'uploader': 'milan',
4381 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4382 }
cdc628a4 4383 }, {
8bdd16b4 4384 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4385 'playlist_mincount': 654,
8bdd16b4 4386 'info_dict': {
4387 'title': '2018 Chinese New Singles (11/6 updated)',
4388 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4389 'uploader': 'LBK',
4390 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4391 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4392 }
daa0df9e 4393 }, {
29f7c58a 4394 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4395 'only_matching': True,
4396 }, {
4397 # music album playlist
4398 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4399 'only_matching': True,
4400 }]
4401
4402 @classmethod
4403 def suitable(cls, url):
201c1459 4404 if YoutubeTabIE.suitable(url):
4405 return False
1bdae7d3 4406 # Hack for lazy extractors until more generic solution is implemented
4407 # (see #28780)
4408 from .youtube import parse_qs
201c1459 4409 qs = parse_qs(url)
4410 if qs.get('v', [None])[0]:
4411 return False
4412 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4413
4414 def _real_extract(self, url):
4415 playlist_id = self._match_id(url)
46953e7e 4416 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4417 url = update_url_query(
4418 'https://www.youtube.com/playlist',
4419 parse_qs(url) or {'list': playlist_id})
4420 if is_music_url:
4421 url = smuggle_url(url, {'is_music_url': True})
4422 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4423
4424
4425class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4426 IE_DESC = 'youtu.be'
29f7c58a 4427 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4428 _TESTS = [{
8bdd16b4 4429 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4430 'info_dict': {
4431 'id': 'yeWKywCrFtk',
4432 'ext': 'mp4',
4433 'title': 'Small Scale Baler and Braiding Rugs',
4434 'uploader': 'Backus-Page House Museum',
4435 'uploader_id': 'backuspagemuseum',
4436 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4437 'upload_date': '20161008',
4438 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4439 'categories': ['Nonprofits & Activism'],
4440 'tags': list,
4441 'like_count': int,
4442 'dislike_count': int,
4443 },
4444 'params': {
4445 'noplaylist': True,
4446 'skip_download': True,
4447 },
39e7107d 4448 }, {
8bdd16b4 4449 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4450 'only_matching': True,
cdc628a4
PH
4451 }]
4452
8bdd16b4 4453 def _real_extract(self, url):
5ad28e7f 4454 mobj = self._match_valid_url(url)
29f7c58a 4455 video_id = mobj.group('id')
4456 playlist_id = mobj.group('playlist_id')
8bdd16b4 4457 return self.url_result(
29f7c58a 4458 update_url_query('https://www.youtube.com/watch', {
4459 'v': video_id,
4460 'list': playlist_id,
4461 'feature': 'youtu.be',
4462 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4463
4464
4465class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4466 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4467 _VALID_URL = r'ytuser:(?P<id>.+)'
4468 _TESTS = [{
4469 'url': 'ytuser:phihag',
4470 'only_matching': True,
4471 }]
4472
4473 def _real_extract(self, url):
4474 user_id = self._match_id(url)
4475 return self.url_result(
4476 'https://www.youtube.com/user/%s' % user_id,
4477 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4478
b05654f0 4479
3d3dddc9 4480class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4481 IE_NAME = 'youtube:favorites'
4482 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4483 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4484 _LOGIN_REQUIRED = True
4485 _TESTS = [{
4486 'url': ':ytfav',
4487 'only_matching': True,
4488 }, {
4489 'url': ':ytfavorites',
4490 'only_matching': True,
4491 }]
4492
4493 def _real_extract(self, url):
4494 return self.url_result(
4495 'https://www.youtube.com/playlist?list=LL',
4496 ie=YoutubeTabIE.ie_key())
4497
4498
79360d99 4499class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4500 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4501 # there doesn't appear to be a real limit, for example if you search for
4502 # 'python' you get more than 8.000.000 results
4503 _MAX_RESULTS = float('inf')
78caa52a 4504 IE_NAME = 'youtube:search'
b05654f0 4505 _SEARCH_KEY = 'ytsearch'
6c894ea1 4506 _SEARCH_PARAMS = None
9dd8e46a 4507 _TESTS = []
b05654f0 4508
6c894ea1 4509 def _entries(self, query, n):
a5c56234 4510 data = {'query': query}
6c894ea1
U
4511 if self._SEARCH_PARAMS:
4512 data['params'] = self._SEARCH_PARAMS
4513 total = 0
fe93e2c4 4514 continuation = {}
6c894ea1 4515 for page_num in itertools.count(1):
fe93e2c4 4516 data.update(continuation)
79360d99 4517 search = self._extract_response(
4518 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4519 check_get_keys=('contents', 'onResponseReceivedCommands')
4520 )
6c894ea1 4521 if not search:
b4c08069 4522 break
6c894ea1
U
4523 slr_contents = try_get(
4524 search,
4525 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4526 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4527 list)
4528 if not slr_contents:
a22b2fd1 4529 break
0366ae87 4530
0366ae87
M
4531 # Youtube sometimes adds promoted content to searches,
4532 # changing the index location of videos and token.
4533 # So we search through all entries till we find them.
fe93e2c4 4534 continuation = None
30a074c2 4535 for slr_content in slr_contents:
fe93e2c4 4536 if not continuation:
4537 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4538
30a074c2 4539 isr_contents = try_get(
4540 slr_content,
4541 lambda x: x['itemSectionRenderer']['contents'],
4542 list)
9da76d30 4543 if not isr_contents:
30a074c2 4544 continue
4545 for content in isr_contents:
4546 if not isinstance(content, dict):
4547 continue
4548 video = content.get('videoRenderer')
4549 if not isinstance(video, dict):
4550 continue
4551 video_id = video.get('videoId')
4552 if not video_id:
4553 continue
4554
4555 yield self._extract_video(video)
4556 total += 1
4557 if total == n:
4558 return
0366ae87 4559
fe93e2c4 4560 if not continuation:
6c894ea1 4561 break
b05654f0 4562
6c894ea1
U
4563 def _get_n_results(self, query, n):
4564 """Get a specified number of results for a query"""
11f9be09 4565 return self.playlist_result(self._entries(query, n), query, query)
75dff0ee 4566
c9ae7b95 4567
a3dd9248 4568class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4569 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4570 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4571 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4572 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4573
c9ae7b95 4574
386e1dd9 4575class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4576 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4577 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4578 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4579 # _MAX_RESULTS = 100
3462ffa8 4580 _TESTS = [{
4581 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4582 'playlist_mincount': 5,
4583 'info_dict': {
11f9be09 4584 'id': 'youtube-dl test video',
3462ffa8 4585 'title': 'youtube-dl test video',
4586 }
4587 }, {
4588 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4589 'only_matching': True,
4590 }]
4591
386e1dd9 4592 @classmethod
4593 def _make_valid_url(cls):
4594 return cls._VALID_URL
4595
3462ffa8 4596 def _real_extract(self, url):
386e1dd9 4597 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4598 query = (qs.get('search_query') or qs.get('q'))[0]
4599 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4600 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4601
4602
4603class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4604 """
25f14e9f 4605 Base class for feed extractors
3d3dddc9 4606 Subclasses must define the _FEED_NAME property.
d7ae0639 4607 """
b2e8bc1b 4608 _LOGIN_REQUIRED = True
ef2f3c7f 4609 _TESTS = []
d7ae0639
JMF
4610
4611 @property
4612 def IE_NAME(self):
78caa52a 4613 return 'youtube:%s' % self._FEED_NAME
04cc9617 4614
3853309f 4615 def _real_extract(self, url):
3d3dddc9 4616 return self.url_result(
4617 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4618 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4619
4620
ef2f3c7f 4621class YoutubeWatchLaterIE(InfoExtractor):
4622 IE_NAME = 'youtube:watchlater'
70d5c17b 4623 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4624 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4625 _TESTS = [{
8bdd16b4 4626 'url': ':ytwatchlater',
bc7a9cd8
S
4627 'only_matching': True,
4628 }]
25f14e9f
S
4629
4630 def _real_extract(self, url):
ef2f3c7f 4631 return self.url_result(
4632 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4633
4634
25f14e9f
S
4635class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4636 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4637 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4638 _FEED_NAME = 'recommended'
45db527f 4639 _LOGIN_REQUIRED = False
3d3dddc9 4640 _TESTS = [{
4641 'url': ':ytrec',
4642 'only_matching': True,
4643 }, {
4644 'url': ':ytrecommended',
4645 'only_matching': True,
4646 }, {
4647 'url': 'https://youtube.com',
4648 'only_matching': True,
4649 }]
1ed5b5c9 4650
1ed5b5c9 4651
25f14e9f 4652class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4653 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4654 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4655 _FEED_NAME = 'subscriptions'
3d3dddc9 4656 _TESTS = [{
4657 'url': ':ytsubs',
4658 'only_matching': True,
4659 }, {
4660 'url': ':ytsubscriptions',
4661 'only_matching': True,
4662 }]
1ed5b5c9 4663
1ed5b5c9 4664
25f14e9f 4665class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4666 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4667 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4668 _FEED_NAME = 'history'
3d3dddc9 4669 _TESTS = [{
4670 'url': ':ythistory',
4671 'only_matching': True,
4672 }]
1ed5b5c9
JMF
4673
4674
15870e90
PH
4675class YoutubeTruncatedURLIE(InfoExtractor):
4676 IE_NAME = 'youtube:truncated_url'
4677 IE_DESC = False # Do not list
975d35db 4678 _VALID_URL = r'''(?x)
b95aab84
PH
4679 (?:https?://)?
4680 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4681 (?:watch\?(?:
c4808c60 4682 feature=[a-z_]+|
b95aab84
PH
4683 annotation_id=annotation_[^&]+|
4684 x-yt-cl=[0-9]+|
c1708b89 4685 hl=[^&]*|
287be8c6 4686 t=[0-9]+
b95aab84
PH
4687 )?
4688 |
4689 attribution_link\?a=[^&]+
4690 )
4691 $
975d35db 4692 '''
15870e90 4693
c4808c60 4694 _TESTS = [{
2d3d2997 4695 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4696 'only_matching': True,
dc2fc736 4697 }, {
2d3d2997 4698 'url': 'https://www.youtube.com/watch?',
dc2fc736 4699 'only_matching': True,
b95aab84
PH
4700 }, {
4701 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4702 'only_matching': True,
4703 }, {
4704 'url': 'https://www.youtube.com/watch?feature=foo',
4705 'only_matching': True,
c1708b89
PH
4706 }, {
4707 'url': 'https://www.youtube.com/watch?hl=en-GB',
4708 'only_matching': True,
287be8c6
PH
4709 }, {
4710 'url': 'https://www.youtube.com/watch?t=2372',
4711 'only_matching': True,
c4808c60
PH
4712 }]
4713
15870e90
PH
4714 def _real_extract(self, url):
4715 raise ExtractorError(
78caa52a
PH
4716 'Did you forget to quote the URL? Remember that & is a meta '
4717 'character in most shells, so you want to put the URL in quotes, '
3867038a 4718 'like youtube-dl '
2d3d2997 4719 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4720 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4721 expected=True)
772fd5cc
PH
4722
4723
4724class YoutubeTruncatedIDIE(InfoExtractor):
4725 IE_NAME = 'youtube:truncated_id'
4726 IE_DESC = False # Do not list
b95aab84 4727 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4728
4729 _TESTS = [{
4730 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4731 'only_matching': True,
4732 }]
4733
4734 def _real_extract(self, url):
4735 video_id = self._match_id(url)
4736 raise ExtractorError(
4737 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4738 expected=True)