]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[vidio] Fix login error detection (#582)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
94278f72 41 mimetype2ext,
9c0d7f49 42 network_exceptions,
11f9be09 43 orderedSet,
6310acf5 44 parse_codecs,
49bd8c66 45 parse_count,
7c80519c 46 parse_duration,
7ea65411 47 parse_iso8601,
dca3ff4a 48 qualities,
3995d37d 49 remove_start,
cf7e015f 50 smuggle_url,
dbdaaa23 51 str_or_none,
c93d53f5 52 str_to_int,
7c365c21 53 traverse_obj,
556dbe7f 54 try_get,
c5e8d7af
PH
55 unescapeHTML,
56 unified_strdate,
cf7e015f 57 unsmuggle_url,
8bdd16b4 58 update_url_query,
21c340b8 59 url_or_none,
6e6bc8da 60 urlencode_postdata,
fe93e2c4 61 urljoin,
7c365c21 62 variadic,
c5e8d7af
PH
63)
64
5f6a1245 65
201c1459 66def parse_qs(url):
67 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
68
69
de7f3446 70class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
71 """Provide base functions for Youtube extractors"""
72 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 73 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
74
75 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
76 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
77 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 78
3462ffa8 79 _RESERVED_NAMES = (
bea74222 80 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 81 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 82 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 83
b2e8bc1b
JMF
84 _NETRC_MACHINE = 'youtube'
85 # If True it will raise an error if no login info is provided
86 _LOGIN_REQUIRED = False
87
70d5c17b 88 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 89
b2e8bc1b 90 def _login(self):
83317f69 91 """
92 Attempt to log in to YouTube.
93 True is returned if successful or skipped.
94 False is returned if login failed.
95
96 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
97 """
9d5d4d64 98
99 def warn(message):
100 self.report_warning(message)
101
102 # username+password login is broken
982ee69a
MB
103 if (self._LOGIN_REQUIRED
104 and self.get_param('cookiefile') is None
105 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 106 self.raise_login_required(
107 'Login details are needed to download this content', method='cookies')
68217024 108 username, password = self._get_login_info()
9d5d4d64 109 if username:
110 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
111 return
9d5d4d64 112
2d6659b9 113 # Everything below this is broken!
114 r'''
b2e8bc1b
JMF
115 # No authentication to be performed
116 if username is None:
a06916d9 117 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 118 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 119 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 120 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 121 return True
b2e8bc1b 122
7cc3570e
PH
123 login_page = self._download_webpage(
124 self._LOGIN_URL, None,
69ea8ca4
PH
125 note='Downloading login page',
126 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
127 if login_page is False:
128 return
b2e8bc1b 129
1212e997 130 login_form = self._hidden_inputs(login_page)
c5e8d7af 131
e00eb564
S
132 def req(url, f_req, note, errnote):
133 data = login_form.copy()
134 data.update({
135 'pstMsg': 1,
136 'checkConnection': 'youtube',
137 'checkedDomains': 'youtube',
138 'hl': 'en',
139 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 140 'f.req': json.dumps(f_req),
e00eb564
S
141 'flowName': 'GlifWebSignIn',
142 'flowEntry': 'ServiceLogin',
baf67a60
S
143 # TODO: reverse actual botguard identifier generation algo
144 'bgRequest': '["identifier",""]',
041bc3ad 145 })
e00eb564
S
146 return self._download_json(
147 url, None, note=note, errnote=errnote,
148 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
149 fatal=False,
150 data=urlencode_postdata(data), headers={
151 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
152 'Google-Accounts-XSRF': 1,
153 })
154
3995d37d
S
155 lookup_req = [
156 username,
157 None, [], None, 'US', None, None, 2, False, True,
158 [
159 None, None,
160 [2, 1, None, 1,
161 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
162 None, [], 4],
163 1, [None, None, []], None, None, None, True
164 ],
165 username,
166 ]
167
e00eb564 168 lookup_results = req(
3995d37d 169 self._LOOKUP_URL, lookup_req,
e00eb564
S
170 'Looking up account info', 'Unable to look up account info')
171
172 if lookup_results is False:
173 return False
041bc3ad 174
3995d37d
S
175 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
176 if not user_hash:
177 warn('Unable to extract user hash')
178 return False
179
180 challenge_req = [
181 user_hash,
182 None, 1, None, [1, None, None, None, [password, None, True]],
183 [
184 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
185 1, [None, None, []], None, None, None, True
186 ]]
83317f69 187
3995d37d
S
188 challenge_results = req(
189 self._CHALLENGE_URL, challenge_req,
190 'Logging in', 'Unable to log in')
83317f69 191
3995d37d 192 if challenge_results is False:
e00eb564 193 return
83317f69 194
3995d37d
S
195 login_res = try_get(challenge_results, lambda x: x[0][5], list)
196 if login_res:
197 login_msg = try_get(login_res, lambda x: x[5], compat_str)
198 warn(
199 'Unable to login: %s' % 'Invalid password'
200 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
201 return False
202
203 res = try_get(challenge_results, lambda x: x[0][-1], list)
204 if not res:
205 warn('Unable to extract result entry')
206 return False
207
9a6628aa
S
208 login_challenge = try_get(res, lambda x: x[0][0], list)
209 if login_challenge:
210 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
211 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
212 # SEND_SUCCESS - TFA code has been successfully sent to phone
213 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 214 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
215 if status == 'QUOTA_EXCEEDED':
216 warn('Exceeded the limit of TFA codes, try later')
217 return False
218
219 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
220 if not tl:
221 warn('Unable to extract TL')
222 return False
223
224 tfa_code = self._get_tfa_info('2-step verification code')
225
226 if not tfa_code:
227 warn(
228 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
229 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
230 return False
231
232 tfa_code = remove_start(tfa_code, 'G-')
233
234 tfa_req = [
235 user_hash, None, 2, None,
236 [
237 9, None, None, None, None, None, None, None,
238 [None, tfa_code, True, 2]
239 ]]
240
241 tfa_results = req(
242 self._TFA_URL.format(tl), tfa_req,
243 'Submitting TFA code', 'Unable to submit TFA code')
244
245 if tfa_results is False:
246 return False
247
248 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
249 if tfa_res:
250 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
251 warn(
252 'Unable to finish TFA: %s' % 'Invalid TFA code'
253 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
254 return False
255
256 check_cookie_url = try_get(
257 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
258 else:
259 CHALLENGES = {
260 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
261 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
262 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
263 }
264 challenge = CHALLENGES.get(
265 challenge_str,
266 '%s returned error %s.' % (self.IE_NAME, challenge_str))
267 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
268 return False
3995d37d
S
269 else:
270 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
271
272 if not check_cookie_url:
273 warn('Unable to extract CheckCookie URL')
274 return False
e00eb564
S
275
276 check_cookie_results = self._download_webpage(
3995d37d
S
277 check_cookie_url, None, 'Checking cookie', fatal=False)
278
279 if check_cookie_results is False:
280 return False
e00eb564 281
3995d37d
S
282 if 'https://myaccount.google.com/' not in check_cookie_results:
283 warn('Unable to log in')
b2e8bc1b 284 return False
e00eb564 285
b2e8bc1b 286 return True
2d6659b9 287 '''
b2e8bc1b 288
cce889b9 289 def _initialize_consent(self):
290 cookies = self._get_cookies('https://www.youtube.com/')
291 if cookies.get('__Secure-3PSID'):
292 return
293 consent_id = None
294 consent = cookies.get('CONSENT')
295 if consent:
296 if 'YES' in consent.value:
297 return
298 consent_id = self._search_regex(
299 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
300 if not consent_id:
301 consent_id = random.randint(100, 999)
302 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 303
b2e8bc1b 304 def _real_initialize(self):
cce889b9 305 self._initialize_consent()
b2e8bc1b
JMF
306 if self._downloader is None:
307 return
b2e8bc1b
JMF
308 if not self._login():
309 return
c5e8d7af 310
a0566bbf 311 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 312 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
313 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 314
109dd3b2 315 _YT_DEFAULT_YTCFGS = {
316 'WEB': {
317 'INNERTUBE_API_VERSION': 'v1',
318 'INNERTUBE_CLIENT_NAME': 'WEB',
319 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
320 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
321 'INNERTUBE_CONTEXT': {
322 'client': {
323 'clientName': 'WEB',
324 'clientVersion': '2.20210622.10.00',
325 'hl': 'en',
326 }
327 },
328 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
329 },
c0bc527b
M
330 'WEB_AGEGATE': {
331 'INNERTUBE_API_VERSION': 'v1',
332 'INNERTUBE_CLIENT_NAME': 'WEB',
333 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
334 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
335 'INNERTUBE_CONTEXT': {
336 'client': {
337 'clientName': 'WEB',
338 'clientVersion': '2.20210622.10.00',
339 'clientScreen': 'EMBED',
340 'hl': 'en',
341 }
342 },
343 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
344 },
109dd3b2 345 'WEB_REMIX': {
346 'INNERTUBE_API_VERSION': 'v1',
347 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
348 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
349 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
350 'INNERTUBE_CONTEXT': {
351 'client': {
352 'clientName': 'WEB_REMIX',
353 'clientVersion': '1.20210621.00.00',
354 'hl': 'en',
355 }
356 },
357 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
358 },
359 'WEB_EMBEDDED_PLAYER': {
360 'INNERTUBE_API_VERSION': 'v1',
361 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
362 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
363 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
364 'INNERTUBE_CONTEXT': {
365 'client': {
366 'clientName': 'WEB_EMBEDDED_PLAYER',
367 'clientVersion': '1.20210620.0.1',
368 'hl': 'en',
369 }
370 },
371 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
372 },
373 'ANDROID': {
374 'INNERTUBE_API_VERSION': 'v1',
375 'INNERTUBE_CLIENT_NAME': 'ANDROID',
376 'INNERTUBE_CLIENT_VERSION': '16.20',
377 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
378 'INNERTUBE_CONTEXT': {
379 'client': {
380 'clientName': 'ANDROID',
381 'clientVersion': '16.20',
382 'hl': 'en',
383 }
384 },
fe93e2c4 385 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
109dd3b2 386 },
c0bc527b
M
387 'ANDROID_AGEGATE': {
388 'INNERTUBE_API_VERSION': 'v1',
389 'INNERTUBE_CLIENT_NAME': 'ANDROID',
390 'INNERTUBE_CLIENT_VERSION': '16.20',
391 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
392 'INNERTUBE_CONTEXT': {
393 'client': {
394 'clientName': 'ANDROID',
395 'clientVersion': '16.20',
396 'clientScreen': 'EMBED',
397 'hl': 'en',
398 }
399 },
400 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
401 },
109dd3b2 402 'ANDROID_EMBEDDED_PLAYER': {
403 'INNERTUBE_API_VERSION': 'v1',
404 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
405 'INNERTUBE_CLIENT_VERSION': '16.20',
406 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
407 'INNERTUBE_CONTEXT': {
408 'client': {
409 'clientName': 'ANDROID_EMBEDDED_PLAYER',
410 'clientVersion': '16.20',
411 'hl': 'en',
412 }
413 },
fe93e2c4 414 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
109dd3b2 415 },
416 'ANDROID_MUSIC': {
417 'INNERTUBE_API_VERSION': 'v1',
418 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
419 'INNERTUBE_CLIENT_VERSION': '4.32',
420 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
421 'INNERTUBE_CONTEXT': {
422 'client': {
423 'clientName': 'ANDROID_MUSIC',
424 'clientVersion': '4.32',
425 'hl': 'en',
426 }
427 },
fe93e2c4 428 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
11f9be09 429 },
430 'IOS': {
431 'INNERTUBE_API_VERSION': 'v1',
432 'INNERTUBE_CLIENT_NAME': 'IOS',
433 'INNERTUBE_CLIENT_VERSION': '16.20',
434 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
435 'INNERTUBE_CONTEXT': {
436 'client': {
437 'clientName': 'IOS',
438 'clientVersion': '16.20',
439 'hl': 'en',
440 }
441 },
442 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
c0bc527b
M
443 },
444 'IOS_AGEGATE': {
445 'INNERTUBE_API_VERSION': 'v1',
446 'INNERTUBE_CLIENT_NAME': 'IOS',
447 'INNERTUBE_CLIENT_VERSION': '16.20',
448 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
449 'INNERTUBE_CONTEXT': {
450 'client': {
451 'clientName': 'IOS',
452 'clientVersion': '16.20',
453 'clientScreen': 'EMBED',
454 'hl': 'en',
455 }
456 },
457 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
11f9be09 458 },
459 'IOS_MUSIC': {
460 'INNERTUBE_API_VERSION': 'v1',
461 'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
462 'INNERTUBE_CLIENT_VERSION': '4.32',
463 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
464 'INNERTUBE_CONTEXT': {
465 'client': {
466 'clientName': 'IOS_MUSIC',
467 'clientVersion': '4.32',
468 'hl': 'en',
469 }
470 },
471 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
472 },
473 'IOS_MESSAGES_EXTENSION': {
474 'INNERTUBE_API_VERSION': 'v1',
475 'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
476 'INNERTUBE_CLIENT_VERSION': '16.20',
477 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
478 'INNERTUBE_CONTEXT': {
479 'client': {
480 'clientName': 'IOS_MESSAGES_EXTENSION',
481 'clientVersion': '16.20',
482 'hl': 'en',
483 }
484 },
485 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
5a1fc62b 486 },
487 'MWEB': {
488 'INNERTUBE_API_VERSION': 'v1',
489 'INNERTUBE_CLIENT_NAME': 'MWEB',
490 'INNERTUBE_CLIENT_VERSION': '2.20210721.07.00',
491 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
492 'INNERTUBE_CONTEXT': {
493 'client': {
494 'clientName': 'MWEB',
495 'clientVersion': '2.20210721.07.00',
496 'hl': 'en',
497 }
498 },
499 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
500 },
c0bc527b
M
501 'MWEB_AGEGATE': {
502 'INNERTUBE_API_VERSION': 'v1',
503 'INNERTUBE_CLIENT_NAME': 'MWEB',
504 'INNERTUBE_CLIENT_VERSION': '2.20210721.07.00',
505 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
506 'INNERTUBE_CONTEXT': {
507 'client': {
508 'clientName': 'MWEB',
509 'clientVersion': '2.20210721.07.00',
510 'clientScreen': 'EMBED',
511 'hl': 'en',
512 }
513 },
514 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
515 },
109dd3b2 516 }
517
518 _YT_DEFAULT_INNERTUBE_HOSTS = {
519 'DIRECT': 'youtubei.googleapis.com',
520 'WEB': 'www.youtube.com',
521 'WEB_REMIX': 'music.youtube.com',
522 'ANDROID_MUSIC': 'music.youtube.com'
523 }
524
11f9be09 525 # clients starting with _ cannot be explicity requested by the user
526 _YT_CLIENTS = {
11f9be09 527 'android': 'ANDROID',
528 'android_music': 'ANDROID_MUSIC',
c0bc527b
M
529 'android_embedded': 'ANDROID_EMBEDDED_PLAYER',
530 'android_agegate': 'ANDROID_AGEGATE',
11f9be09 531 'ios': 'IOS',
532 'ios_music': 'IOS_MUSIC',
c0bc527b
M
533 'ios_embedded': 'IOS_MESSAGES_EXTENSION',
534 'ios_agegate': 'IOS_AGEGATE',
b4c055ba 535 'web': 'WEB',
536 'web_music': 'WEB_REMIX',
c0bc527b
M
537 'web_embedded': 'WEB_EMBEDDED_PLAYER',
538 'web_agegate': 'WEB_AGEGATE',
539 'mweb': 'MWEB',
540 'mweb_agegate': 'MWEB_AGEGATE',
11f9be09 541 }
542
109dd3b2 543 def _get_default_ytcfg(self, client='WEB'):
544 if client in self._YT_DEFAULT_YTCFGS:
545 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
546 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
547 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
548
549 def _get_innertube_host(self, client='WEB'):
550 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
551
552 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
553 # try_get but with fallback to default ytcfg client values when present
554 _func = lambda y: try_get(y, getter, expected_type)
555 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
556
557 def _extract_client_name(self, ytcfg, default_client='WEB'):
558 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
559
314ee305 560 @staticmethod
11f9be09 561 def _extract_session_index(*data):
562 for ytcfg in data:
563 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
564 if session_index is not None:
565 return session_index
314ee305 566
109dd3b2 567 def _extract_client_version(self, ytcfg, default_client='WEB'):
568 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
569
570 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
571 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
572
573 def _extract_context(self, ytcfg=None, default_client='WEB'):
574 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
575 context = _get_context(ytcfg)
576 if context:
577 return context
578
579 context = _get_context(self._get_default_ytcfg(default_client))
580 if not ytcfg:
581 return context
582
583 # Recreate the client context (required)
584 context['client'].update({
585 'clientVersion': self._extract_client_version(ytcfg, default_client),
586 'clientName': self._extract_client_name(ytcfg, default_client),
587 })
588 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
589 if visitor_data:
590 context['client']['visitorData'] = visitor_data
591 return context
592
593 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 594 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
595 # See: https://github.com/yt-dlp/yt-dlp/issues/393
596 yt_cookies = self._get_cookies('https://www.youtube.com')
597 sapisid_cookie = dict_get(
598 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
c926c954 599 if sapisid_cookie is None or not sapisid_cookie.value:
a5c56234
M
600 return
601 time_now = round(time.time())
1974e99f 602 # SAPISID cookie is required if not already present
603 if not yt_cookies.get('SAPISID'):
c926c954 604 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
1974e99f 605 self._set_cookie(
606 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
c926c954 607 self.write_debug('Extracted SAPISID cookie', only_once=True)
1974e99f 608 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
609 sapisidhash = hashlib.sha1(
109dd3b2 610 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 611 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
612
613 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 614 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 615 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 616
109dd3b2 617 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 618 data.update(query)
11f9be09 619 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 620 real_headers.update({'content-type': 'application/json'})
621 if headers:
622 real_headers.update(headers)
545cc85d 623 return self._download_json(
109dd3b2 624 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 625 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 626 data=json.dumps(data).encode('utf8'), headers=real_headers,
627 query={'key': api_key or self._extract_api_key()})
628
11f9be09 629 def extract_yt_initial_data(self, video_id, webpage):
8bdd16b4 630 return self._parse_json(
631 self._search_regex(
29f7c58a 632 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 633 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 634 video_id)
0c148415 635
a1c5d2ca 636 def _extract_identity_token(self, webpage, item_id):
11f9be09 637 if not webpage:
638 return None
639 ytcfg = self.extract_ytcfg(item_id, webpage)
a1c5d2ca
M
640 if ytcfg:
641 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
642 if token:
643 return token
644 return self._search_regex(
645 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
646 'identity token', default=None)
647
648 @staticmethod
fe93e2c4 649 def _extract_account_syncid(*args):
8ea3f7b9 650 """
651 Extract syncId required to download private playlists of secondary channels
fe93e2c4 652 @params response and/or ytcfg
8ea3f7b9 653 """
fe93e2c4 654 for data in args:
655 # ytcfg includes channel_syncid if on secondary channel
656 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
657 if delegated_sid:
658 return delegated_sid
659 sync_ids = (try_get(
660 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
661 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
662 if len(sync_ids) >= 2 and sync_ids[1]:
663 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
664 # and just "user_syncid||" for primary channel. We only want the channel_syncid
665 return sync_ids[0]
a1c5d2ca 666
11f9be09 667 def extract_ytcfg(self, video_id, webpage):
8c54a305 668 if not webpage:
669 return {}
29f7c58a 670 return self._parse_json(
671 self._search_regex(
672 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 673 default='{}'), video_id, fatal=False) or {}
674
11f9be09 675 def generate_api_headers(
676 self, ytcfg=None, identity_token=None, account_syncid=None,
677 visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
678 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 679 headers = {
109dd3b2 680 'X-YouTube-Client-Name': compat_str(
11f9be09 681 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
682 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
109dd3b2 683 'Origin': origin
f4f751af 684 }
2d6659b9 685 if not visitor_data and ytcfg:
686 visitor_data = try_get(
11f9be09 687 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 688 if identity_token:
109dd3b2 689 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 690 if account_syncid:
691 headers['X-Goog-PageId'] = account_syncid
314ee305 692 if session_index is None and ytcfg:
693 session_index = self._extract_session_index(ytcfg)
694 if account_syncid or session_index is not None:
695 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 696 if visitor_data:
109dd3b2 697 headers['X-Goog-Visitor-Id'] = visitor_data
698 auth = self._generate_sapisidhash_header(origin)
f4f751af 699 if auth is not None:
700 headers['Authorization'] = auth
109dd3b2 701 headers['X-Origin'] = origin
f4f751af 702 return headers
29f7c58a 703
2d6659b9 704 @staticmethod
705 def _build_api_continuation_query(continuation, ctp=None):
706 query = {
707 'continuation': continuation
708 }
709 # TODO: Inconsistency with clickTrackingParams.
710 # Currently we have a fixed ctp contained within context (from ytcfg)
711 # and a ctp in root query for continuation.
712 if ctp:
713 query['clickTracking'] = {'clickTrackingParams': ctp}
714 return query
715
2d6659b9 716 @classmethod
717 def _extract_next_continuation_data(cls, renderer):
718 next_continuation = try_get(
719 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
720 lambda x: x['continuation']['reloadContinuationData']), dict)
721 if not next_continuation:
722 return
723 continuation = next_continuation.get('continuation')
724 if not continuation:
725 return
726 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 727 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 728
729 @classmethod
730 def _extract_continuation_ep_data(cls, continuation_ep: dict):
731 if isinstance(continuation_ep, dict):
732 continuation = try_get(
733 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
734 if not continuation:
735 return
736 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 737 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 738
739 @classmethod
740 def _extract_continuation(cls, renderer):
741 next_continuation = cls._extract_next_continuation_data(renderer)
742 if next_continuation:
743 return next_continuation
fe93e2c4 744
2d6659b9 745 contents = []
746 for key in ('contents', 'items'):
747 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 748
2d6659b9 749 for content in contents:
750 if not isinstance(content, dict):
751 continue
752 continuation_ep = try_get(
753 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
754 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
755 dict)
756 continuation = cls._extract_continuation_ep_data(continuation_ep)
757 if continuation:
758 return continuation
759
fe93e2c4 760 @classmethod
761 def _extract_alerts(cls, data):
109dd3b2 762 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
763 if not isinstance(alert_dict, dict):
764 continue
765 for alert in alert_dict.values():
766 alert_type = alert.get('type')
767 if not alert_type:
768 continue
052e1350 769 message = cls._get_text(alert, 'text')
109dd3b2 770 if message:
771 yield alert_type, message
772
773 def _report_alerts(self, alerts, expected=True):
774 errors = []
775 warnings = []
776 for alert_type, alert_message in alerts:
777 if alert_type.lower() == 'error':
778 errors.append([alert_type, alert_message])
779 else:
780 warnings.append([alert_type, alert_message])
781
782 for alert_type, alert_message in (warnings + errors[:-1]):
783 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
784 if errors:
785 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
786
787 def _extract_and_report_alerts(self, data, *args, **kwargs):
788 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
789
47193e02 790 def _extract_badges(self, renderer: dict):
791 badges = set()
792 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
793 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
794 if label:
795 badges.add(label.lower())
796 return badges
797
798 @staticmethod
052e1350 799 def _get_text(data, *path_list, max_runs=None):
800 for path in path_list or [None]:
801 if path is None:
802 obj = [data]
803 else:
804 obj = traverse_obj(data, path, default=[])
805 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
806 obj = [obj]
807 for item in obj:
808 text = try_get(item, lambda x: x['simpleText'], compat_str)
809 if text:
810 return text
811 runs = try_get(item, lambda x: x['runs'], list) or []
812 if not runs and isinstance(item, list):
813 runs = item
814
815 runs = runs[:min(len(runs), max_runs or len(runs))]
816 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
817 if text:
818 return text
47193e02 819
109dd3b2 820 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
821 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
822 default_client='WEB'):
823 response = None
824 last_error = None
825 count = -1
826 retries = self.get_param('extractor_retries', 3)
827 if check_get_keys is None:
828 check_get_keys = []
829 while count < retries:
830 count += 1
831 if last_error:
832 self.report_warning('%s. Retrying ...' % last_error)
833 try:
834 response = self._call_api(
835 ep=ep, fatal=True, headers=headers,
836 video_id=item_id, query=query,
837 context=self._extract_context(ytcfg, default_client),
838 api_key=self._extract_api_key(ytcfg, default_client),
839 api_hostname=api_hostname, default_client=default_client,
840 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
841 except ExtractorError as e:
9c0d7f49 842 if isinstance(e.cause, network_exceptions):
109dd3b2 843 # Downloading page may result in intermittent 5xx HTTP error
844 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 845 # We also want to catch all other network exceptions since errors in later pages can be troublesome
846 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
847 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
848 last_error = error_to_compat_str(e.cause or e)
849 if count < retries:
850 continue
109dd3b2 851 if fatal:
852 raise
853 else:
854 self.report_warning(error_to_compat_str(e))
855 return
856
857 else:
858 # Youtube may send alerts if there was an issue with the continuation page
859 try:
860 self._extract_and_report_alerts(response, expected=False)
861 except ExtractorError as e:
862 if fatal:
863 raise
864 self.report_warning(error_to_compat_str(e))
865 return
866 if not check_get_keys or dict_get(response, check_get_keys):
867 break
868 # Youtube sometimes sends incomplete data
869 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
870 last_error = 'Incomplete data received'
871 if count >= retries:
872 if fatal:
873 raise ExtractorError(last_error)
874 else:
875 self.report_warning(last_error)
876 return
877 return response
878
9297939e 879 @staticmethod
880 def is_music_url(url):
881 return re.match(r'https?://music\.youtube\.com/', url) is not None
882
30a074c2 883 def _extract_video(self, renderer):
884 video_id = renderer.get('videoId')
052e1350 885 title = self._get_text(renderer, 'title')
886 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 887 duration = parse_duration(self._get_text(
888 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 889 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 890 view_count = str_to_int(self._search_regex(
891 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
892 'view count', default=None))
fe93e2c4 893
052e1350 894 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
fe93e2c4 895
30a074c2 896 return {
39ed931e 897 '_type': 'url',
30a074c2 898 'ie_key': YoutubeIE.ie_key(),
899 'id': video_id,
900 'url': video_id,
901 'title': title,
902 'description': description,
903 'duration': duration,
904 'view_count': view_count,
905 'uploader': uploader,
906 }
907
0c148415 908
360e1ca5 909class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 910 IE_DESC = 'YouTube.com'
bc2ca1bb 911 _INVIDIOUS_SITES = (
912 # invidious-redirect websites
913 r'(?:www\.)?redirect\.invidious\.io',
914 r'(?:(?:www|dev)\.)?invidio\.us',
915 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
916 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 917 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 918 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 919 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 920 # youtube-dl invidious instances list
921 r'(?:(?:www|no)\.)?invidiou\.sh',
922 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
923 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 924 r'(?:www\.)?invidious\.mastodon\.host',
925 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 926 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 927 r'(?:www\.)?invidious\.tinfoil-hat\.net',
928 r'(?:www\.)?invidious\.himiko\.cloud',
929 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 930 r'(?:www\.)?invidious\.tube',
931 r'(?:www\.)?invidiou\.site',
932 r'(?:www\.)?invidious\.site',
933 r'(?:www\.)?invidious\.xyz',
934 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 935 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 936 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 937 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 938 r'(?:www\.)?tube\.poal\.co',
939 r'(?:www\.)?tube\.connect\.cafe',
940 r'(?:www\.)?vid\.wxzm\.sx',
941 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 942 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 943 r'(?:www\.)?yewtu\.be',
944 r'(?:www\.)?yt\.elukerio\.org',
945 r'(?:www\.)?yt\.lelux\.fi',
946 r'(?:www\.)?invidious\.ggc-project\.de',
947 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 948 r'(?:www\.)?ytprivate\.com',
949 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 950 r'(?:www\.)?invidious\.toot\.koeln',
951 r'(?:www\.)?invidious\.fdn\.fr',
952 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 953 r'(?:www\.)?invidious\.namazso\.eu',
954 r'(?:www\.)?invidious\.silkky\.cloud',
955 r'(?:www\.)?invidious\.exonip\.de',
956 r'(?:www\.)?invidious\.riverside\.rocks',
957 r'(?:www\.)?invidious\.blamefran\.net',
958 r'(?:www\.)?invidious\.moomoo\.de',
959 r'(?:www\.)?ytb\.trom\.tf',
960 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 961 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
962 r'(?:www\.)?qklhadlycap4cnod\.onion',
963 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
964 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
965 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
966 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
967 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
968 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 969 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
970 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
971 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
972 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 973 )
cb7dfeea 974 _VALID_URL = r"""(?x)^
c5e8d7af 975 (
edb53e2d 976 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 977 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
978 (?:www\.)?deturl\.com/www\.youtube\.com|
979 (?:www\.)?pwnyoutube\.com|
980 (?:www\.)?hooktube\.com|
981 (?:www\.)?yourepeat\.com|
982 tube\.majestyc\.net|
983 %(invidious)s|
984 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
985 (?:.*?\#/)? # handle anchor (#/) redirect urls
986 (?: # the various things that can precede the ID:
ac7553d0 987 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 988 |(?: # or the v= param in all its forms
f7000f3a 989 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 990 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 991 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
992 v=
993 )
f4b05232 994 ))
cbaed4bb
S
995 |(?:
996 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
997 vid\.plus| # or vid.plus/xxxx
998 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 999 %(invidious)s
cbaed4bb 1000 )/
edb53e2d 1001 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 1002 )
c5e8d7af 1003 )? # all until now is optional -> you can pass the naked ID
201c1459 1004 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 1005 (?(1).+)? # if we found the ID, everything can follow
9297939e 1006 (?:\#|$)""" % {
bc2ca1bb 1007 'invidious': '|'.join(_INVIDIOUS_SITES),
1008 }
e40c758c 1009 _PLAYER_INFO_RE = (
cc2db878 1010 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1011 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 1012 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 1013 )
2c62dc26 1014 _formats = {
c2d3cb4c 1015 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1016 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1017 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1018 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1019 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1020 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1021 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1022 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 1023 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 1024 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1025 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1026 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1027 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1028 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1029 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 1030 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 1031 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1032 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 1033
1034
1035 # 3D videos
c2d3cb4c 1036 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1037 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1038 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1039 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1040 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1041 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1042 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1043
96fb5605 1044 # Apple HTTP Live Streaming
11f12195 1045 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1046 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1047 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1048 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1049 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1050 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1051 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1052 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1053
1054 # DASH mp4 video
d23028a8
S
1055 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1056 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1057 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1058 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1059 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1060 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1061 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1062 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1063 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1064 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1065 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1066 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1067
f6f1fc92 1068 # Dash mp4 audio
d23028a8
S
1069 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1070 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1071 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1072 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1073 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1074 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1075 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1076
1077 # Dash webm
d23028a8
S
1078 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1079 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1080 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1081 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1082 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1083 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1084 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1085 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1086 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1087 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1088 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1089 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1090 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1091 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1092 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1093 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1094 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1095 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1096 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1097 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1098 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1099 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1100
1101 # Dash webm audio
d23028a8
S
1102 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1103 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1104
0857baad 1105 # Dash webm audio with opus inside
d23028a8
S
1106 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1107 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1108 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1109
ce6b9a2d
PH
1110 # RTMP (unnamed)
1111 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1112
1113 # av01 video only formats sometimes served with "unknown" codecs
1114 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1115 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1116 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1117 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 1118 }
29f7c58a 1119 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1120
109dd3b2 1121 _AGE_GATE_REASONS = (
1122 'Sign in to confirm your age',
1123 'This video may be inappropriate for some users.',
1124 'Sorry, this content is age-restricted.')
1125
fd5c4aab
S
1126 _GEO_BYPASS = False
1127
78caa52a 1128 IE_NAME = 'youtube'
2eb88d95
PH
1129 _TESTS = [
1130 {
2d3d2997 1131 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1132 'info_dict': {
1133 'id': 'BaW_jenozKc',
1134 'ext': 'mp4',
3867038a 1135 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1136 'uploader': 'Philipp Hagemeister',
1137 'uploader_id': 'phihag',
ec85ded8 1138 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
1139 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1140 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1141 'upload_date': '20121002',
3867038a 1142 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 1143 'categories': ['Science & Technology'],
3867038a 1144 'tags': ['youtube-dl'],
556dbe7f 1145 'duration': 10,
dbdaaa23 1146 'view_count': int,
3e7c1224
PH
1147 'like_count': int,
1148 'dislike_count': int,
7c80519c 1149 'start_time': 1,
297a564b 1150 'end_time': 9,
2eb88d95 1151 }
0e853ca4 1152 },
fccd3771 1153 {
4bc3a23e
PH
1154 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1155 'note': 'Embed-only video (#1746)',
1156 'info_dict': {
1157 'id': 'yZIXLfi8CZQ',
1158 'ext': 'mp4',
1159 'upload_date': '20120608',
1160 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1161 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1162 'uploader': 'SET India',
94bfcd23 1163 'uploader_id': 'setindia',
ec85ded8 1164 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1165 'age_limit': 18,
545cc85d 1166 },
1167 'skip': 'Private video',
fccd3771 1168 },
11b56058 1169 {
8bdd16b4 1170 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1171 'note': 'Use the first video ID in the URL',
1172 'info_dict': {
1173 'id': 'BaW_jenozKc',
1174 'ext': 'mp4',
3867038a 1175 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1176 'uploader': 'Philipp Hagemeister',
1177 'uploader_id': 'phihag',
ec85ded8 1178 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1179 'upload_date': '20121002',
3867038a 1180 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1181 'categories': ['Science & Technology'],
3867038a 1182 'tags': ['youtube-dl'],
556dbe7f 1183 'duration': 10,
dbdaaa23 1184 'view_count': int,
11b56058
PM
1185 'like_count': int,
1186 'dislike_count': int,
34a7de29
S
1187 },
1188 'params': {
1189 'skip_download': True,
1190 },
11b56058 1191 },
dd27fd17 1192 {
2d3d2997 1193 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1194 'note': '256k DASH audio (format 141) via DASH manifest',
1195 'info_dict': {
1196 'id': 'a9LDPn-MO4I',
1197 'ext': 'm4a',
1198 'upload_date': '20121002',
1199 'uploader_id': '8KVIDEO',
ec85ded8 1200 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1201 'description': '',
1202 'uploader': '8KVIDEO',
1203 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1204 },
4bc3a23e
PH
1205 'params': {
1206 'youtube_include_dash_manifest': True,
1207 'format': '141',
4919603f 1208 },
de3c7fe0 1209 'skip': 'format 141 not served anymore',
dd27fd17 1210 },
8bdd16b4 1211 # DASH manifest with encrypted signature
1212 {
1213 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1214 'info_dict': {
1215 'id': 'IB3lcPjvWLA',
1216 'ext': 'm4a',
1217 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1218 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1219 'duration': 244,
1220 'uploader': 'AfrojackVEVO',
1221 'uploader_id': 'AfrojackVEVO',
1222 'upload_date': '20131011',
cc2db878 1223 'abr': 129.495,
8bdd16b4 1224 },
1225 'params': {
1226 'youtube_include_dash_manifest': True,
1227 'format': '141/bestaudio[ext=m4a]',
1228 },
1229 },
dd2d55f1 1230 # Normal age-gate video (embed allowed)
c522adb1 1231 {
2d3d2997 1232 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1233 'info_dict': {
1234 'id': 'HtVdAasjOgU',
1235 'ext': 'mp4',
1236 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1237 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1238 'duration': 142,
c522adb1
JMF
1239 'uploader': 'The Witcher',
1240 'uploader_id': 'WitcherGame',
ec85ded8 1241 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1242 'upload_date': '20140605',
34952f09 1243 'age_limit': 18,
c522adb1
JMF
1244 },
1245 },
8bdd16b4 1246 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1247 # YouTube Red ad is not captured for creator
1248 {
1249 'url': '__2ABJjxzNo',
1250 'info_dict': {
1251 'id': '__2ABJjxzNo',
1252 'ext': 'mp4',
1253 'duration': 266,
1254 'upload_date': '20100430',
1255 'uploader_id': 'deadmau5',
1256 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1257 'creator': 'deadmau5',
1258 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1259 'uploader': 'deadmau5',
1260 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1261 'alt_title': 'Some Chords',
8bdd16b4 1262 },
1263 'expected_warnings': [
1264 'DASH manifest missing',
1265 ]
1266 },
067aa17e 1267 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1268 {
1269 'url': 'lqQg6PlCWgI',
1270 'info_dict': {
1271 'id': 'lqQg6PlCWgI',
1272 'ext': 'mp4',
556dbe7f 1273 'duration': 6085,
90227264 1274 'upload_date': '20150827',
cbe2bd91 1275 'uploader_id': 'olympic',
ec85ded8 1276 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1277 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1278 'uploader': 'Olympics',
cbe2bd91
PH
1279 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1280 },
1281 'params': {
1282 'skip_download': 'requires avconv',
e52a40ab 1283 }
cbe2bd91 1284 },
6271f1ca
PH
1285 # Non-square pixels
1286 {
1287 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1288 'info_dict': {
1289 'id': '_b-2C3KPAM0',
1290 'ext': 'mp4',
1291 'stretched_ratio': 16 / 9.,
556dbe7f 1292 'duration': 85,
6271f1ca
PH
1293 'upload_date': '20110310',
1294 'uploader_id': 'AllenMeow',
ec85ded8 1295 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1296 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1297 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1298 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1299 },
06b491eb
S
1300 },
1301 # url_encoded_fmt_stream_map is empty string
1302 {
1303 'url': 'qEJwOuvDf7I',
1304 'info_dict': {
1305 'id': 'qEJwOuvDf7I',
f57b7835 1306 'ext': 'webm',
06b491eb
S
1307 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1308 'description': '',
1309 'upload_date': '20150404',
1310 'uploader_id': 'spbelect',
1311 'uploader': 'Наблюдатели Петербурга',
1312 },
1313 'params': {
1314 'skip_download': 'requires avconv',
e323cf3f
S
1315 },
1316 'skip': 'This live event has ended.',
06b491eb 1317 },
067aa17e 1318 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1319 {
1320 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1321 'info_dict': {
1322 'id': 'FIl7x6_3R5Y',
eb6793ba 1323 'ext': 'webm',
da77d856
S
1324 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1325 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1326 'duration': 220,
da77d856
S
1327 'upload_date': '20150625',
1328 'uploader_id': 'dorappi2000',
ec85ded8 1329 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1330 'uploader': 'dorappi2000',
eb6793ba 1331 'formats': 'mincount:31',
da77d856 1332 },
eb6793ba 1333 'skip': 'not actual anymore',
2ee8f5d8 1334 },
8a1a26ce
YCH
1335 # DASH manifest with segment_list
1336 {
1337 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1338 'md5': '8ce563a1d667b599d21064e982ab9e31',
1339 'info_dict': {
1340 'id': 'CsmdDsKjzN8',
1341 'ext': 'mp4',
17ee98e1 1342 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1343 'uploader': 'Airtek',
1344 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1345 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1346 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1347 },
1348 'params': {
1349 'youtube_include_dash_manifest': True,
1350 'format': '135', # bestvideo
be49068d
S
1351 },
1352 'skip': 'This live event has ended.',
2ee8f5d8 1353 },
cf7e015f
S
1354 {
1355 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1356 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1357 'info_dict': {
545cc85d 1358 'id': 'jvGDaLqkpTg',
1359 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1360 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1361 },
1362 'playlist': [{
1363 'info_dict': {
545cc85d 1364 'id': 'jvGDaLqkpTg',
cf7e015f 1365 'ext': 'mp4',
545cc85d 1366 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1367 'description': 'md5:e03b909557865076822aa169218d6a5d',
1368 'duration': 10643,
1369 'upload_date': '20161111',
1370 'uploader': 'Team PGP',
1371 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1372 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1373 },
1374 }, {
1375 'info_dict': {
545cc85d 1376 'id': '3AKt1R1aDnw',
cf7e015f 1377 'ext': 'mp4',
545cc85d 1378 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1379 'description': 'md5:e03b909557865076822aa169218d6a5d',
1380 'duration': 10991,
1381 'upload_date': '20161111',
1382 'uploader': 'Team PGP',
1383 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1384 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1385 },
1386 }, {
1387 'info_dict': {
545cc85d 1388 'id': 'RtAMM00gpVc',
cf7e015f 1389 'ext': 'mp4',
545cc85d 1390 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1391 'description': 'md5:e03b909557865076822aa169218d6a5d',
1392 'duration': 10995,
1393 'upload_date': '20161111',
1394 'uploader': 'Team PGP',
1395 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1396 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1397 },
1398 }, {
1399 'info_dict': {
545cc85d 1400 'id': '6N2fdlP3C5U',
cf7e015f 1401 'ext': 'mp4',
545cc85d 1402 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1403 'description': 'md5:e03b909557865076822aa169218d6a5d',
1404 'duration': 10990,
1405 'upload_date': '20161111',
1406 'uploader': 'Team PGP',
1407 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1408 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1409 },
1410 }],
1411 'params': {
1412 'skip_download': True,
1413 },
cbaed4bb 1414 },
f9f49d87 1415 {
067aa17e 1416 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1417 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1418 'info_dict': {
1419 'id': 'gVfLd0zydlo',
1420 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1421 },
1422 'playlist_count': 2,
be49068d 1423 'skip': 'Not multifeed anymore',
f9f49d87 1424 },
cbaed4bb 1425 {
2d3d2997 1426 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1427 'only_matching': True,
0e49d9a6 1428 },
6d4fc66b 1429 {
2d3d2997 1430 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1431 'only_matching': True,
1432 },
0e49d9a6 1433 {
067aa17e 1434 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1435 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1436 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1437 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1438 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1439 'info_dict': {
1440 'id': 'lsguqyKfVQg',
1441 'ext': 'mp4',
1442 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1443 'alt_title': 'Dark Walk',
0e49d9a6 1444 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1445 'duration': 133,
0e49d9a6
LL
1446 'upload_date': '20151119',
1447 'uploader_id': 'IronSoulElf',
ec85ded8 1448 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1449 'uploader': 'IronSoulElf',
11f9be09 1450 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1451 'track': 'Dark Walk',
1452 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1453 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1454 },
1455 'params': {
1456 'skip_download': True,
1457 },
1458 },
61f92af1 1459 {
067aa17e 1460 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1461 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1462 'only_matching': True,
1463 },
313dfc45
LL
1464 {
1465 # Video with yt:stretch=17:0
1466 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1467 'info_dict': {
1468 'id': 'Q39EVAstoRM',
1469 'ext': 'mp4',
1470 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1471 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1472 'upload_date': '20151107',
1473 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1474 'uploader': 'CH GAMER DROID',
1475 },
1476 'params': {
1477 'skip_download': True,
1478 },
be49068d 1479 'skip': 'This video does not exist.',
313dfc45 1480 },
201c1459 1481 {
1482 # Video with incomplete 'yt:stretch=16:'
1483 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1484 'only_matching': True,
1485 },
7caf9830
S
1486 {
1487 # Video licensed under Creative Commons
1488 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1489 'info_dict': {
1490 'id': 'M4gD1WSo5mA',
1491 'ext': 'mp4',
1492 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1493 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1494 'duration': 721,
7caf9830
S
1495 'upload_date': '20150127',
1496 'uploader_id': 'BerkmanCenter',
ec85ded8 1497 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1498 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1499 'license': 'Creative Commons Attribution license (reuse allowed)',
1500 },
1501 'params': {
1502 'skip_download': True,
1503 },
1504 },
fd050249
S
1505 {
1506 # Channel-like uploader_url
1507 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1508 'info_dict': {
1509 'id': 'eQcmzGIKrzg',
1510 'ext': 'mp4',
1511 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1512 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1513 'duration': 4060,
fd050249 1514 'upload_date': '20151119',
eb6793ba 1515 'uploader': 'Bernie Sanders',
fd050249 1516 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1517 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1518 'license': 'Creative Commons Attribution license (reuse allowed)',
1519 },
1520 'params': {
1521 'skip_download': True,
1522 },
1523 },
040ac686
S
1524 {
1525 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1526 'only_matching': True,
7f29cf54
S
1527 },
1528 {
067aa17e 1529 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1530 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1531 'only_matching': True,
6496ccb4
S
1532 },
1533 {
1534 # Rental video preview
1535 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1536 'info_dict': {
1537 'id': 'uGpuVWrhIzE',
1538 'ext': 'mp4',
1539 'title': 'Piku - Trailer',
1540 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1541 'upload_date': '20150811',
1542 'uploader': 'FlixMatrix',
1543 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1544 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1545 'license': 'Standard YouTube License',
1546 },
1547 'params': {
1548 'skip_download': True,
1549 },
eb6793ba 1550 'skip': 'This video is not available.',
022a5d66 1551 },
12afdc2a
S
1552 {
1553 # YouTube Red video with episode data
1554 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1555 'info_dict': {
1556 'id': 'iqKdEhx-dD4',
1557 'ext': 'mp4',
1558 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1559 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1560 'duration': 2085,
12afdc2a
S
1561 'upload_date': '20170118',
1562 'uploader': 'Vsauce',
1563 'uploader_id': 'Vsauce',
1564 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1565 'series': 'Mind Field',
1566 'season_number': 1,
1567 'episode_number': 1,
1568 },
1569 'params': {
1570 'skip_download': True,
1571 },
1572 'expected_warnings': [
1573 'Skipping DASH manifest',
1574 ],
1575 },
c7121fa7
S
1576 {
1577 # The following content has been identified by the YouTube community
1578 # as inappropriate or offensive to some audiences.
1579 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1580 'info_dict': {
1581 'id': '6SJNVb0GnPI',
1582 'ext': 'mp4',
1583 'title': 'Race Differences in Intelligence',
1584 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1585 'duration': 965,
1586 'upload_date': '20140124',
1587 'uploader': 'New Century Foundation',
1588 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1589 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1590 },
1591 'params': {
1592 'skip_download': True,
1593 },
545cc85d 1594 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1595 },
022a5d66
S
1596 {
1597 # itag 212
1598 'url': '1t24XAntNCY',
1599 'only_matching': True,
fd5c4aab
S
1600 },
1601 {
1602 # geo restricted to JP
1603 'url': 'sJL6WA-aGkQ',
1604 'only_matching': True,
1605 },
cd5a74a2
S
1606 {
1607 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1608 'only_matching': True,
1609 },
bc2ca1bb 1610 {
1611 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1612 'only_matching': True,
1613 },
1614 {
1615 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1616 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1617 'only_matching': True,
1618 },
825cd268
RA
1619 {
1620 # DRM protected
1621 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1622 'only_matching': True,
4fe54c12
S
1623 },
1624 {
1625 # Video with unsupported adaptive stream type formats
1626 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1627 'info_dict': {
1628 'id': 'Z4Vy8R84T1U',
1629 'ext': 'mp4',
1630 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1631 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1632 'duration': 433,
1633 'upload_date': '20130923',
1634 'uploader': 'Amelia Putri Harwita',
1635 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1636 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1637 'formats': 'maxcount:10',
1638 },
1639 'params': {
1640 'skip_download': True,
1641 'youtube_include_dash_manifest': False,
1642 },
5429d6a9 1643 'skip': 'not actual anymore',
5caabd3c 1644 },
1645 {
822b9d9c 1646 # Youtube Music Auto-generated description
5caabd3c 1647 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1648 'info_dict': {
1649 'id': 'MgNrAu2pzNs',
1650 'ext': 'mp4',
1651 'title': 'Voyeur Girl',
1652 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1653 'upload_date': '20190312',
5429d6a9
S
1654 'uploader': 'Stephen - Topic',
1655 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1656 'artist': 'Stephen',
1657 'track': 'Voyeur Girl',
1658 'album': 'it\'s too much love to know my dear',
1659 'release_date': '20190313',
1660 'release_year': 2019,
1661 },
1662 'params': {
1663 'skip_download': True,
1664 },
1665 },
66b48727
RA
1666 {
1667 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1668 'only_matching': True,
1669 },
011e75e6
S
1670 {
1671 # invalid -> valid video id redirection
1672 'url': 'DJztXj2GPfl',
1673 'info_dict': {
1674 'id': 'DJztXj2GPfk',
1675 'ext': 'mp4',
1676 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1677 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1678 'upload_date': '20090125',
1679 'uploader': 'Prochorowka',
1680 'uploader_id': 'Prochorowka',
1681 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1682 'artist': 'Panjabi MC',
1683 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1684 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1685 },
1686 'params': {
1687 'skip_download': True,
1688 },
545cc85d 1689 'skip': 'Video unavailable',
ea74e00b
DP
1690 },
1691 {
1692 # empty description results in an empty string
1693 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1694 'info_dict': {
1695 'id': 'x41yOUIvK2k',
1696 'ext': 'mp4',
1697 'title': 'IMG 3456',
1698 'description': '',
1699 'upload_date': '20170613',
1700 'uploader_id': 'ElevageOrVert',
1701 'uploader': 'ElevageOrVert',
1702 },
1703 'params': {
1704 'skip_download': True,
1705 },
1706 },
a0566bbf 1707 {
29f7c58a 1708 # with '};' inside yt initial data (see [1])
1709 # see [2] for an example with '};' inside ytInitialPlayerResponse
1710 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1711 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1712 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1713 'info_dict': {
1714 'id': 'CHqg6qOn4no',
1715 'ext': 'mp4',
1716 'title': 'Part 77 Sort a list of simple types in c#',
1717 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1718 'upload_date': '20130831',
1719 'uploader_id': 'kudvenkat',
1720 'uploader': 'kudvenkat',
1721 },
1722 'params': {
1723 'skip_download': True,
1724 },
1725 },
29f7c58a 1726 {
1727 # another example of '};' in ytInitialData
1728 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1729 'only_matching': True,
1730 },
1731 {
1732 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1733 'only_matching': True,
1734 },
545cc85d 1735 {
cc2db878 1736 # https://github.com/ytdl-org/youtube-dl/pull/28094
1737 'url': 'OtqTfy26tG0',
1738 'info_dict': {
1739 'id': 'OtqTfy26tG0',
1740 'ext': 'mp4',
1741 'title': 'Burn Out',
1742 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1743 'upload_date': '20141120',
1744 'uploader': 'The Cinematic Orchestra - Topic',
1745 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1746 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1747 'artist': 'The Cinematic Orchestra',
1748 'track': 'Burn Out',
1749 'album': 'Every Day',
1750 'release_data': None,
1751 'release_year': None,
1752 },
1753 'params': {
1754 'skip_download': True,
1755 },
545cc85d 1756 },
bc2ca1bb 1757 {
1758 # controversial video, only works with bpctr when authenticated with cookies
1759 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1760 'only_matching': True,
1761 },
a1a7907b 1762 {
1763 # controversial video, requires bpctr/contentCheckOk
1764 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1765 'info_dict': {
1766 'id': 'SZJvDhaSDnc',
1767 'ext': 'mp4',
1768 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1769 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1770 'uploader': 'CBS This Morning',
11f9be09 1771 'uploader_id': 'CBSThisMorning',
a1a7907b 1772 'upload_date': '20140716',
1773 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1774 }
1775 },
f7ad7160 1776 {
1777 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1778 'url': 'cBvYw8_A0vQ',
1779 'info_dict': {
1780 'id': 'cBvYw8_A0vQ',
1781 'ext': 'mp4',
1782 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1783 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1784 'upload_date': '20201120',
1785 'uploader': 'Walk around Japan',
1786 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1787 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1788 },
1789 'params': {
1790 'skip_download': True,
1791 },
0fb983f6 1792 }, {
1793 # Has multiple audio streams
1794 'url': 'WaOKSUlf4TM',
1795 'only_matching': True
9297939e 1796 }, {
1797 # Requires Premium: has format 141 when requested using YTM url
1798 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1799 'only_matching': True
1800 }, {
120916da 1801 # multiple subtitles with same lang_code
1802 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1803 'only_matching': True,
109dd3b2 1804 }, {
1805 # Force use android client fallback
1806 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1807 'info_dict': {
1808 'id': 'YOelRv7fMxY',
11f9be09 1809 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1810 'ext': '3gp',
1811 'upload_date': '20210624',
1812 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1813 'uploader': 'colinfurze',
11f9be09 1814 'uploader_id': 'colinfurze',
109dd3b2 1815 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1816 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1817 },
1818 'params': {
1819 'format': '17', # 3gp format available on android
1820 'extractor_args': {'youtube': {'player_client': ['android']}},
1821 },
120916da 1822 },
109dd3b2 1823 {
1824 # Skip download of additional client configs (remix client config in this case)
1825 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1826 'only_matching': True,
1827 'params': {
1828 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1829 },
1830 }
2eb88d95
PH
1831 ]
1832
201c1459 1833 @classmethod
1834 def suitable(cls, url):
1bdae7d3 1835 # Hack for lazy extractors until more generic solution is implemented
1836 # (see #28780)
1837 from .youtube import parse_qs
201c1459 1838 qs = parse_qs(url)
1839 if qs.get('list', [None])[0]:
1840 return False
1841 return super(YoutubeIE, cls).suitable(url)
1842
e0df6211
PH
1843 def __init__(self, *args, **kwargs):
1844 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1845 self._code_cache = {}
83799698 1846 self._player_cache = {}
e0df6211 1847
109dd3b2 1848 def _extract_player_url(self, ytcfg=None, webpage=None):
1849 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
11f9be09 1850 if not player_url and webpage:
109dd3b2 1851 player_url = self._search_regex(
1852 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1853 webpage, 'player URL', fatal=False)
11f9be09 1854 if not player_url:
1855 return None
109dd3b2 1856 if player_url.startswith('//'):
1857 player_url = 'https:' + player_url
1858 elif not re.match(r'https?://', player_url):
1859 player_url = compat_urlparse.urljoin(
1860 'https://www.youtube.com', player_url)
1861 return player_url
1862
60064c53
PH
1863 def _signature_cache_id(self, example_sig):
1864 """ Return a string representation of a signature """
78caa52a 1865 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1866
e40c758c
S
1867 @classmethod
1868 def _extract_player_info(cls, player_url):
1869 for player_re in cls._PLAYER_INFO_RE:
1870 id_m = re.search(player_re, player_url)
1871 if id_m:
1872 break
1873 else:
c081b35c 1874 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1875 return id_m.group('id')
e40c758c 1876
109dd3b2 1877 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1878 player_id = self._extract_player_info(player_url)
1879 if player_id not in self._code_cache:
1880 self._code_cache[player_id] = self._download_webpage(
1881 player_url, video_id, fatal=fatal,
1882 note='Downloading player ' + player_id,
1883 errnote='Download of %s failed' % player_url)
1884 return player_id in self._code_cache
1885
e40c758c 1886 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1887 player_id = self._extract_player_info(player_url)
e0df6211 1888
c4417ddb 1889 # Read from filesystem cache
545cc85d 1890 func_id = 'js_%s_%s' % (
1891 player_id, self._signature_cache_id(example_sig))
c4417ddb 1892 assert os.path.basename(func_id) == func_id
a0e07d31 1893
69ea8ca4 1894 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1895 if cache_spec is not None:
78caa52a 1896 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1897
109dd3b2 1898 if self._load_player(video_id, player_url):
1899 code = self._code_cache[player_id]
1900 res = self._parse_sig_js(code)
e0df6211 1901
109dd3b2 1902 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1903 cache_res = res(test_string)
1904 cache_spec = [ord(c) for c in cache_res]
83799698 1905
109dd3b2 1906 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1907 return res
83799698 1908
60064c53 1909 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1910 def gen_sig_code(idxs):
1911 def _genslice(start, end, step):
78caa52a 1912 starts = '' if start == 0 else str(start)
8bcc8756 1913 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1914 steps = '' if step == 1 else (':%d' % step)
78caa52a 1915 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1916
1917 step = None
7af808a5
PH
1918 # Quelch pyflakes warnings - start will be set when step is set
1919 start = '(Never used)'
edf3e38e
PH
1920 for i, prev in zip(idxs[1:], idxs[:-1]):
1921 if step is not None:
1922 if i - prev == step:
1923 continue
1924 yield _genslice(start, prev, step)
1925 step = None
1926 continue
1927 if i - prev in [-1, 1]:
1928 step = i - prev
1929 start = prev
1930 continue
1931 else:
78caa52a 1932 yield 's[%d]' % prev
edf3e38e 1933 if step is None:
78caa52a 1934 yield 's[%d]' % i
edf3e38e
PH
1935 else:
1936 yield _genslice(start, i, step)
1937
78caa52a 1938 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1939 cache_res = func(test_string)
edf3e38e 1940 cache_spec = [ord(c) for c in cache_res]
78caa52a 1941 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1942 signature_id_tuple = '(%s)' % (
1943 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1944 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1945 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1946 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1947
e0df6211
PH
1948 def _parse_sig_js(self, jscode):
1949 funcname = self._search_regex(
abefc03f
S
1950 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1951 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1952 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1953 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1954 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1955 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1956 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1957 # Obsolete patterns
1958 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1959 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1960 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1961 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1962 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1963 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1964 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1965 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1966 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1967
1968 jsi = JSInterpreter(jscode)
1969 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1970 return lambda s: initial_function([s])
1971
545cc85d 1972 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1973 """Turn the encrypted s field into a working signature"""
6b37f0be 1974
c8bf86d5 1975 if player_url is None:
69ea8ca4 1976 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1977
c8bf86d5 1978 try:
62af3a0e 1979 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1980 if player_id not in self._player_cache:
1981 func = self._extract_signature_function(
60064c53 1982 video_id, player_url, s
c8bf86d5
PH
1983 )
1984 self._player_cache[player_id] = func
1985 func = self._player_cache[player_id]
a06916d9 1986 if self.get_param('youtube_print_sig_code'):
60064c53 1987 self._print_sig_code(func, s)
c8bf86d5
PH
1988 return func(s)
1989 except Exception as e:
1990 tb = traceback.format_exc()
1991 raise ExtractorError(
78caa52a 1992 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1993
109dd3b2 1994 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1995 """
1996 Extract signatureTimestamp (sts)
1997 Required to tell API what sig/player version is in use.
1998 """
1999 sts = None
2000 if isinstance(ytcfg, dict):
2001 sts = int_or_none(ytcfg.get('STS'))
2002
2003 if not sts:
2004 # Attempt to extract from player
2005 if player_url is None:
2006 error_msg = 'Cannot extract signature timestamp without player_url.'
2007 if fatal:
2008 raise ExtractorError(error_msg)
2009 self.report_warning(error_msg)
2010 return
2011 if self._load_player(video_id, player_url, fatal=fatal):
2012 player_id = self._extract_player_info(player_url)
2013 code = self._code_cache[player_id]
2014 sts = int_or_none(self._search_regex(
2015 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2016 'JS player signature timestamp', group='sts', fatal=fatal))
2017 return sts
2018
11f9be09 2019 def _mark_watched(self, video_id, player_responses):
352d63fd 2020 playback_url = traverse_obj(
2021 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2022 expected_type=url_or_none, get_all=False)
d77ab8e2 2023 if not playback_url:
352d63fd 2024 self.report_warning('Unable to mark watched')
d77ab8e2
S
2025 return
2026 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2027 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2028
2029 # cpn generation algorithm is reverse engineered from base.js.
2030 # In fact it works even with dummy cpn.
2031 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2032 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2033
2034 qs.update({
2035 'ver': ['2'],
2036 'cpn': [cpn],
2037 })
2038 playback_url = compat_urlparse.urlunparse(
15707c7e 2039 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
2040
2041 self._download_webpage(
2042 playback_url, video_id, 'Marking watched',
2043 'Unable to mark watched', fatal=False)
2044
66c9fa36
S
2045 @staticmethod
2046 def _extract_urls(webpage):
2047 # Embedded YouTube player
2048 entries = [
2049 unescapeHTML(mobj.group('url'))
2050 for mobj in re.finditer(r'''(?x)
2051 (?:
2052 <iframe[^>]+?src=|
2053 data-video-url=|
2054 <embed[^>]+?src=|
2055 embedSWF\(?:\s*|
2056 <object[^>]+data=|
2057 new\s+SWFObject\(
2058 )
2059 (["\'])
2060 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2061 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2062 \1''', webpage)]
2063
2064 # lazyYT YouTube embed
2065 entries.extend(list(map(
2066 unescapeHTML,
2067 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2068
2069 # Wordpress "YouTube Video Importer" plugin
2070 matches = re.findall(r'''(?x)<div[^>]+
2071 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2072 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2073 entries.extend(m[-1] for m in matches)
2074
2075 return entries
2076
2077 @staticmethod
2078 def _extract_url(webpage):
2079 urls = YoutubeIE._extract_urls(webpage)
2080 return urls[0] if urls else None
2081
97665381
PH
2082 @classmethod
2083 def extract_id(cls, url):
2084 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2085 if mobj is None:
69ea8ca4 2086 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
2087 video_id = mobj.group(2)
2088 return video_id
2089
7c365c21 2090 def _extract_chapters_from_json(self, data, duration):
2091 chapter_list = traverse_obj(
2092 data, (
2093 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2094 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2095 ), expected_type=list)
2096
2097 return self._extract_chapters(
2098 chapter_list,
2099 chapter_time=lambda chapter: float_or_none(
2100 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2101 chapter_title=lambda chapter: traverse_obj(
2102 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2103 duration=duration)
2104
2105 def _extract_chapters_from_engagement_panel(self, data, duration):
2106 content_list = traverse_obj(
8bdd16b4 2107 data,
7c365c21 2108 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2109 expected_type=list, default=[])
052e1350 2110 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2111 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2112
2113 return next((
2114 filter(None, (
2115 self._extract_chapters(
2116 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2117 chapter_time, chapter_title, duration)
2118 for contents in content_list
2119 ))), [])
2120
2121 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2122 chapters = []
7c365c21 2123 last_chapter = {'start_time': 0}
2124 for idx, chapter in enumerate(chapter_list or []):
2125 title = chapter_title(chapter)
84213ea8
S
2126 start_time = chapter_time(chapter)
2127 if start_time is None:
2128 continue
7c365c21 2129 last_chapter['end_time'] = start_time
2130 if start_time < last_chapter['start_time']:
2131 if idx == 1:
2132 chapters.pop()
2133 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2134 else:
2135 self.report_warning(f'Invalid start time for chapter "{title}"')
2136 continue
2137 last_chapter = {'start_time': start_time, 'title': title}
2138 chapters.append(last_chapter)
2139 last_chapter['end_time'] = duration
84213ea8
S
2140 return chapters
2141
545cc85d 2142 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2143 return self._parse_json(self._search_regex(
2144 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2145 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2146
d92f5d5a 2147 @staticmethod
2148 def parse_time_text(time_text):
2149 """
2150 Parse the comment time text
2151 time_text is in the format 'X units ago (edited)'
2152 """
2153 time_text_split = time_text.split(' ')
2154 if len(time_text_split) >= 3:
da503b7a 2155 try:
2156 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2157 except ValueError:
2158 return None
d92f5d5a 2159
a1c5d2ca
M
2160 def _extract_comment(self, comment_renderer, parent=None):
2161 comment_id = comment_renderer.get('commentId')
2162 if not comment_id:
2163 return
fe93e2c4 2164
052e1350 2165 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2166
49bd8c66 2167 # note: timestamp is an estimate calculated from the current time and time_text
052e1350 2168 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
fe93e2c4 2169 time_text_dt = self.parse_time_text(time_text)
2170 if isinstance(time_text_dt, datetime.datetime):
2171 timestamp = calendar.timegm(time_text_dt.timetuple())
052e1350 2172 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2173 author_id = try_get(comment_renderer,
2174 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2175
49bd8c66 2176 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2177 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2178 author_thumbnail = try_get(comment_renderer,
2179 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2180
2181 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2182 is_favorited = 'creatorHeart' in (try_get(
2183 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2184 return {
2185 'id': comment_id,
2186 'text': text,
d92f5d5a 2187 'timestamp': timestamp,
a1c5d2ca
M
2188 'time_text': time_text,
2189 'like_count': votes,
97524332 2190 'is_favorited': is_favorited,
a1c5d2ca
M
2191 'author': author,
2192 'author_id': author_id,
2193 'author_thumbnail': author_thumbnail,
2194 'author_is_uploader': author_is_uploader,
2195 'parent': parent or 'root'
2196 }
2197
2198 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2199 ytcfg, video_id, parent=None, comment_counts=None):
2200
2201 def extract_header(contents):
2202 _total_comments = 0
2203 _continuation = None
2204 for content in contents:
2205 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2206 expected_comment_count = parse_count(self._get_text(
052e1350 2207 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2208
2d6659b9 2209 if expected_comment_count:
fe93e2c4 2210 comment_counts[1] = expected_comment_count
2211 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2212 _total_comments = comment_counts[1]
2213 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2214 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2215
2216 sort_menu_item = try_get(
2217 comments_header_renderer,
2218 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2219 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2220
2221 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2222 if not _continuation:
2223 continue
2224
2225 sort_text = sort_menu_item.get('title')
2226 if isinstance(sort_text, compat_str):
2227 sort_text = sort_text.lower()
2228 else:
2229 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2230 self.to_screen('Sorting comments by %s' % sort_text)
2231 break
2232 return _total_comments, _continuation
a1c5d2ca 2233
2d6659b9 2234 def extract_thread(contents):
a1c5d2ca
M
2235 if not parent:
2236 comment_counts[2] = 0
2237 for content in contents:
2238 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2239 comment_renderer = try_get(
2240 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2241 content, (lambda x: x['commentRenderer'], dict))
2242
2243 if not comment_renderer:
2244 continue
2245 comment = self._extract_comment(comment_renderer, parent)
2246 if not comment:
2247 continue
2248 comment_counts[0] += 1
2249 yield comment
2250 # Attempt to get the replies
2251 comment_replies_renderer = try_get(
2252 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2253
2254 if comment_replies_renderer:
2255 comment_counts[2] += 1
2256 comment_entries_iter = self._comment_entries(
f4f751af 2257 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2258 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2259
2260 for reply_comment in comment_entries_iter:
2261 yield reply_comment
2262
2d6659b9 2263 # YouTube comments have a max depth of 2
2264 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2265 if max_depth == 1 and parent:
2266 return
a1c5d2ca
M
2267 if not comment_counts:
2268 # comment so far, est. total comments, current comment thread #
2269 comment_counts = [0, 0, 0]
a1c5d2ca 2270
2d6659b9 2271 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2272 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2273 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2274 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2275 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2276
2277 visitor_data = None
2278 is_first_continuation = parent is None
a1c5d2ca
M
2279
2280 for page_num in itertools.count(0):
2281 if not continuation:
2282 break
11f9be09 2283 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2284 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2285 if page_num == 0:
2286 if is_first_continuation:
2287 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2288 else:
2d6659b9 2289 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2290 comment_counts[2], comment_prog_str)
2291 else:
2292 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2293 ' ' if parent else '', ' replies' if parent else '',
2294 page_num, comment_prog_str)
2295
2296 response = self._extract_response(
fe93e2c4 2297 item_id=None, query=continuation,
2d6659b9 2298 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2299 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2300 if not response:
2301 break
f4f751af 2302 visitor_data = try_get(
2303 response,
2304 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2305 compat_str) or visitor_data
a1c5d2ca 2306
2d6659b9 2307 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2308
2d6659b9 2309 continuation = None
2310 if isinstance(continuation_contents, list):
2311 for continuation_section in continuation_contents:
2312 if not isinstance(continuation_section, dict):
2313 continue
2314 continuation_items = try_get(
2315 continuation_section,
2316 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2317 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2318 list) or []
2319 if is_first_continuation:
2320 total_comments, continuation = extract_header(continuation_items)
2321 if total_comments:
2322 yield total_comments
2323 is_first_continuation = False
2324 if continuation:
2325 break
2326 continue
2327 count = 0
2328 for count, entry in enumerate(extract_thread(continuation_items)):
2329 yield entry
2330 continuation = self._extract_continuation({'contents': continuation_items})
2331 if continuation:
2332 # Sometimes YouTube provides a continuation without any comments
2333 # In most cases we end up just downloading these with very little comments to come.
2334 if count == 0:
2335 if not parent:
2336 self.report_warning('No comments received - assuming end of comments')
2337 continuation = None
a1c5d2ca
M
2338 break
2339
2d6659b9 2340 # Deprecated response structure
2341 elif isinstance(continuation_contents, dict):
2342 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2343 for key, continuation_renderer in continuation_contents.items():
2344 if key not in known_continuation_renderers:
2345 continue
2346 if not isinstance(continuation_renderer, dict):
2347 continue
2348 if is_first_continuation:
2349 header_continuation_items = [continuation_renderer.get('header') or {}]
2350 total_comments, continuation = extract_header(header_continuation_items)
2351 if total_comments:
2352 yield total_comments
2353 is_first_continuation = False
2354 if continuation:
2355 break
a1c5d2ca 2356
2d6659b9 2357 # Sometimes YouTube provides a continuation without any comments
2358 # In most cases we end up just downloading these with very little comments to come.
2359 count = 0
2360 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2361 yield entry
2362 continuation = self._extract_continuation(continuation_renderer)
2363 if count == 0:
2364 if not parent:
2365 self.report_warning('No comments received - assuming end of comments')
2366 continuation = None
2367 break
a1c5d2ca 2368
2d6659b9 2369 @staticmethod
2370 def _generate_comment_continuation(video_id):
2371 """
2372 Generates initial comment section continuation token from given video id
2373 """
2374 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2375 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2376 new_continuation_intlist = list(itertools.chain.from_iterable(
2377 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2378 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2379
2380 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2381 """Entry for comment extraction"""
2d6659b9 2382 def _real_comment_extract(contents):
2383 if isinstance(contents, list):
2384 for entry in contents:
2385 for key, renderer in entry.items():
2386 if key not in known_entry_comment_renderers:
2387 continue
2388 yield from self._comment_entries(
2389 renderer, video_id=video_id, ytcfg=ytcfg,
2390 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2391 account_syncid=self._extract_account_syncid(ytcfg))
2392 break
a1c5d2ca 2393 comments = []
2d6659b9 2394 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2395 estimated_total = 0
2d6659b9 2396 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2397
2d6659b9 2398 try:
2399 for comment in _real_comment_extract(contents):
2400 if len(comments) >= max_comments:
2401 break
2402 if isinstance(comment, int):
2403 estimated_total = comment
2404 continue
2405 comments.append(comment)
2406 except KeyboardInterrupt:
2407 self.to_screen('Interrupted by user')
d92f5d5a 2408 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2409 return {
2410 'comments': comments,
2411 'comment_count': len(comments),
2412 }
2413
109dd3b2 2414 @staticmethod
2415 def _generate_player_context(sts=None):
2416 context = {
2417 'html5Preference': 'HTML5_PREF_WANTS',
2418 }
2419 if sts is not None:
2420 context['signatureTimestamp'] = sts
2421 return {
2422 'playbackContext': {
2423 'contentPlaybackContext': context
a1a7907b 2424 },
2fd226f6 2425 'contentCheckOk': True,
2426 'racyCheckOk': True
109dd3b2 2427 }
2428
11f9be09 2429 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
109dd3b2 2430
11f9be09 2431 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2432 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2433 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2434 headers = self.generate_api_headers(
2435 player_ytcfg, identity_token, syncid,
2436 default_client=self._YT_CLIENTS[client], session_index=session_index)
9297939e 2437
11f9be09 2438 yt_query = {'videoId': video_id}
2439 yt_query.update(self._generate_player_context(sts))
2440 return self._extract_response(
2441 item_id=video_id, ep='player', query=yt_query,
2442 ytcfg=player_ytcfg, headers=headers, fatal=False,
2443 default_client=self._YT_CLIENTS[client],
2444 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2445 ) or None
2446
11f9be09 2447 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2448 requested_clients = []
2449 allowed_clients = [client for client in self._YT_CLIENTS.keys() if client[:1] != '_']
2450 for client in self._configuration_arg('player_client'):
2451 if client in allowed_clients:
2452 requested_clients.append(client)
2453 elif client == 'all':
2454 requested_clients.extend(allowed_clients)
2455 else:
2456 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2457 if not requested_clients:
2458 requested_clients = ['android', 'web']
cf7e015f 2459
11f9be09 2460 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2461 requested_clients.extend(
2462 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
dbdaaa23 2463
11f9be09 2464 return orderedSet(requested_clients)
cf7e015f 2465
c0bc527b
M
2466 def _extract_player_ytcfg(self, client, video_id):
2467 url = {
2468 'web_music': 'https://music.youtube.com',
2469 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2470 }.get(client)
2471 if not url:
2472 return {}
2473 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2474 return self.extract_ytcfg(video_id, webpage) or {}
2475
11f9be09 2476 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2477 initial_pr = None
2478 if webpage:
2479 initial_pr = self._extract_yt_initial_variable(
2480 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2481 video_id, 'initial player response')
6b09401b 2482
c0bc527b
M
2483 original_clients = clients
2484 clients = clients[::-1]
2485 while clients:
2486 client = clients.pop()
11f9be09 2487 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2488 if 'configs' not in self._configuration_arg('player_skip'):
2489 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2490 if client == 'web_embedded':
2491 # If we extracted the embed webpage, it'll tell us if we can view the video
2492 embedded_pr = self._parse_json(
2493 traverse_obj(player_ytcfg, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
2494 video_id=video_id)
2495 embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
2496 if embedded_ps_reason in self._AGE_GATE_REASONS:
2497 self.report_warning(f'Youtube said: {embedded_ps_reason}')
2498 continue
2499
2500 pr = (
2501 initial_pr if client == 'web' and initial_pr
2502 else self._extract_player_response(
2503 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr))
11f9be09 2504 if pr:
2505 yield pr
c0bc527b 2506
ad34b295 2507 if traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
c0bc527b
M
2508 client = f'{client}_agegate'
2509 if client in self._YT_CLIENTS and client not in original_clients:
2510 clients.append(client)
2511
11f9be09 2512 # Android player_response does not have microFormats which are needed for
2513 # extraction of some data. So we return the initial_pr with formats
2514 # stripped out even if not requested by the user
2515 # See: https://github.com/yt-dlp/yt-dlp/issues/501
c0bc527b 2516 if initial_pr and 'web' not in original_clients:
11f9be09 2517 initial_pr['streamingData'] = None
2518 yield initial_pr
2519
2520 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2521 itags, stream_ids = [], []
2a9c6dcd 2522 itag_qualities, res_qualities = {}, {}
d3fc8074 2523 q = qualities([
2a9c6dcd 2524 # Normally tiny is the smallest video-only formats. But
2525 # audio-only formats with unknown quality may get tagged as tiny
2526 'tiny',
2527 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2528 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2529 ])
11f9be09 2530 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2531
545cc85d 2532 for fmt in streaming_formats:
2533 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2534 continue
321bf820 2535
cc2db878 2536 itag = str_or_none(fmt.get('itag'))
9297939e 2537 audio_track = fmt.get('audioTrack') or {}
2538 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2539 if stream_id in stream_ids:
2540 continue
2541
cc2db878 2542 quality = fmt.get('quality')
2a9c6dcd 2543 height = int_or_none(fmt.get('height'))
d3fc8074 2544 if quality == 'tiny' or not quality:
2545 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2546 # The 3gp format (17) in android client has a quality of "small",
2547 # but is actually worse than other formats
2548 if itag == '17':
2549 quality = 'tiny'
2550 if quality:
2551 if itag:
2552 itag_qualities[itag] = quality
2553 if height:
2554 res_qualities[height] = quality
cc2db878 2555 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2556 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2557 # number of fragment that would subsequently requested with (`&sq=N`)
2558 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2559 continue
2560
545cc85d 2561 fmt_url = fmt.get('url')
2562 if not fmt_url:
2563 sc = compat_parse_qs(fmt.get('signatureCipher'))
2564 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2565 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2566 if not (sc and fmt_url and encrypted_sig):
2567 continue
545cc85d 2568 if not player_url:
201e9eaa 2569 continue
545cc85d 2570 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2571 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2572 fmt_url += '&' + sp + '=' + signature
2573
545cc85d 2574 if itag:
2575 itags.append(itag)
9297939e 2576 stream_ids.append(stream_id)
2577
cc2db878 2578 tbr = float_or_none(
2579 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2580 dct = {
2581 'asr': int_or_none(fmt.get('audioSampleRate')),
2582 'filesize': int_or_none(fmt.get('contentLength')),
2583 'format_id': itag,
11f9be09 2584 'format_note': ', '.join(filter(None, (
2a9c6dcd 2585 audio_track.get('displayName'),
2586 fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
545cc85d 2587 'fps': int_or_none(fmt.get('fps')),
2a9c6dcd 2588 'height': height,
dca3ff4a 2589 'quality': q(quality),
cc2db878 2590 'tbr': tbr,
545cc85d 2591 'url': fmt_url,
2a9c6dcd 2592 'width': int_or_none(fmt.get('width')),
0fb983f6 2593 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2594 }
60bdb7bd 2595 mime_mobj = re.match(
2596 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2597 if mime_mobj:
2598 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2599 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2600 no_audio = dct.get('acodec') == 'none'
2601 no_video = dct.get('vcodec') == 'none'
2602 if no_audio:
2603 dct['vbr'] = tbr
2604 if no_video:
2605 dct['abr'] = tbr
2606 if no_audio or no_video:
545cc85d 2607 dct['downloader_options'] = {
2608 # Youtube throttles chunks >~10M
2609 'http_chunk_size': 10485760,
bf1317d2 2610 }
7c60c33e 2611 if dct.get('ext'):
2612 dct['container'] = dct['ext'] + '_dash'
11f9be09 2613 yield dct
545cc85d 2614
4bb6b02f 2615 skip_manifests = self._configuration_arg('skip')
11f9be09 2616 get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
5d3a0e79 2617 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2618
2a9c6dcd 2619 def guess_quality(f):
2620 for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2621 if val in qdict:
2622 return q(qdict[val])
2623 return -1
2624
11f9be09 2625 for sd in streaming_data:
5d3a0e79 2626 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2627 if hls_manifest_url:
2a9c6dcd 2628 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
9297939e 2629 itag = self._search_regex(
2630 r'/itag/(\d+)', f['url'], 'itag', default=None)
11f9be09 2631 if itag in itags:
2632 continue
9297939e 2633 if itag:
2634 f['format_id'] = itag
11f9be09 2635 itags.append(itag)
2a9c6dcd 2636 f['quality'] = guess_quality(f)
11f9be09 2637 yield f
545cc85d 2638
5d3a0e79 2639 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2640 if dash_manifest_url:
2a9c6dcd 2641 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
5d3a0e79 2642 itag = f['format_id']
2643 if itag in itags:
2644 continue
11f9be09 2645 if itag:
2646 itags.append(itag)
2a9c6dcd 2647 f['quality'] = guess_quality(f)
5d3a0e79 2648 filesize = int_or_none(self._search_regex(
2649 r'/clen/(\d+)', f.get('fragment_base_url')
2650 or f['url'], 'file size', default=None))
2651 if filesize:
2652 f['filesize'] = filesize
11f9be09 2653 yield f
2654
2655 def _real_extract(self, url):
2656 url, smuggled_data = unsmuggle_url(url, {})
2657 video_id = self._match_id(url)
2658
2659 base_url = self.http_scheme() + '//www.youtube.com/'
2660 webpage_url = base_url + 'watch?v=' + video_id
2661 webpage = self._download_webpage(
2662 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2663
2664 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2665 player_url = self._extract_player_url(master_ytcfg, webpage)
2666 identity_token = self._extract_identity_token(webpage, video_id)
2667
2668 player_responses = list(self._extract_player_responses(
2669 self._get_requested_clients(url, smuggled_data),
2670 video_id, webpage, master_ytcfg, player_url, identity_token))
2671
352d63fd 2672 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2673
2674 playability_statuses = traverse_obj(
2675 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2676
2677 trailer_video_id = get_first(
2678 playability_statuses,
2679 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2680 expected_type=str)
2681 if trailer_video_id:
2682 return self.url_result(
2683 trailer_video_id, self.ie_key(), trailer_video_id)
2684
2685 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2686 if webpage else (lambda x: None))
2687
2688 video_details = traverse_obj(
2689 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2690 microformats = traverse_obj(
2691 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2692 expected_type=dict, default=[])
2693 video_title = (
2694 get_first(video_details, 'title')
2695 or self._get_text(microformats, (..., 'title'))
2696 or search_meta(['og:title', 'twitter:title', 'title']))
2697 video_description = get_first(video_details, 'shortDescription')
2698
2699 if not smuggled_data.get('force_singlefeed', False):
2700 if not self.get_param('noplaylist'):
2701 multifeed_metadata_list = get_first(
2702 player_responses,
2703 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2704 expected_type=str)
2705 if multifeed_metadata_list:
2706 entries = []
2707 feed_ids = []
2708 for feed in multifeed_metadata_list.split(','):
2709 # Unquote should take place before split on comma (,) since textual
2710 # fields may contain comma as well (see
2711 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2712 feed_data = compat_parse_qs(
2713 compat_urllib_parse_unquote_plus(feed))
2714
2715 def feed_entry(name):
2716 return try_get(
2717 feed_data, lambda x: x[name][0], compat_str)
2718
2719 feed_id = feed_entry('id')
2720 if not feed_id:
2721 continue
2722 feed_title = feed_entry('title')
2723 title = video_title
2724 if feed_title:
2725 title += ' (%s)' % feed_title
2726 entries.append({
2727 '_type': 'url_transparent',
2728 'ie_key': 'Youtube',
2729 'url': smuggle_url(
2730 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2731 {'force_singlefeed': True}),
2732 'title': title,
2733 })
2734 feed_ids.append(feed_id)
2735 self.to_screen(
2736 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2737 % (', '.join(feed_ids), video_id))
2738 return self.playlist_result(
2739 entries, video_id, video_title, video_description)
2740 else:
2741 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2742
7ea65411 2743 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2744 is_live = get_first(video_details, 'isLive')
7ea65411 2745 if is_live is None:
2746 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2747
2748 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2749 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2750
545cc85d 2751 if not formats:
11f9be09 2752 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
b7da73eb 2753 self.raise_no_formats(
545cc85d 2754 'This video is DRM protected.', expected=True)
11f9be09 2755 pemr = get_first(
2756 playability_statuses,
2757 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2758 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2759 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2760 if subreason:
545cc85d 2761 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2762 countries = get_first(microformats, 'availableCountries')
545cc85d 2763 if not countries:
2764 regions_allowed = search_meta('regionsAllowed')
2765 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2766 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2767 reason += f'. {subreason}'
545cc85d 2768 if reason:
b7da73eb 2769 self.raise_no_formats(reason, expected=True)
bf1317d2 2770
11f9be09 2771 for f in formats:
2a9c6dcd 2772 if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
11f9be09 2773 f['source_preference'] = -10
2a9c6dcd 2774 note = f.get('format_note')
2775 f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
11f9be09 2776
2a9c6dcd 2777 # Source is given priority since formats that throttle are given lower source_preference
2778 # When throttling issue is fully fixed, remove this
2779 self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
bf1317d2 2780
11f9be09 2781 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2782 if not keywords and webpage:
2783 keywords = [
2784 unescapeHTML(m.group('content'))
2785 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2786 for keyword in keywords:
2787 if keyword.startswith('yt:stretch='):
201c1459 2788 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2789 if mobj:
2790 # NB: float is intentional for forcing float division
2791 w, h = (float(v) for v in mobj.groups())
2792 if w > 0 and h > 0:
2793 ratio = w / h
2794 for f in formats:
2795 if f.get('vcodec') != 'none':
2796 f['stretched_ratio'] = ratio
2797 break
6449cd80 2798
545cc85d 2799 thumbnails = []
11f9be09 2800 thumbnail_dicts = traverse_obj(
2801 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2802 expected_type=dict, default=[])
2803 for thumbnail in thumbnail_dicts:
2804 thumbnail_url = thumbnail.get('url')
2805 if not thumbnail_url:
2806 continue
2807 # Sometimes youtube gives a wrong thumbnail URL. See:
2808 # https://github.com/yt-dlp/yt-dlp/issues/233
2809 # https://github.com/ytdl-org/youtube-dl/issues/28023
2810 if 'maxresdefault' in thumbnail_url:
2811 thumbnail_url = thumbnail_url.split('?')[0]
2812 thumbnails.append({
2813 'url': thumbnail_url,
2814 'height': int_or_none(thumbnail.get('height')),
2815 'width': int_or_none(thumbnail.get('width')),
2816 })
ff2751ac 2817 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2818 if thumbnail_url:
2819 thumbnails.append({
2820 'url': thumbnail_url,
ff2751ac 2821 })
0ba692ac 2822 # The best resolution thumbnails sometimes does not appear in the webpage
2823 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2824 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2825 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
245524e6 2826 # TODO: Test them also? - For some videos, even these don't exist
cca80fe6 2827 guaranteed_thumbnail_names = [
2828 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2829 'mqdefault', 'mq1', 'mq2', 'mq3',
2830 'default', '1', '2', '3'
2831 ]
2832 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2833 n_thumbnail_names = len(thumbnail_names)
2834
0ba692ac 2835 thumbnails.extend({
2836 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2837 video_id=video_id, name=name, ext=ext,
2838 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2839 '_test_url': name in hq_thumbnail_names,
2840 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2841 for thumb in thumbnails:
cca80fe6 2842 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2843 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2844 self._remove_duplicate_formats(thumbnails)
545cc85d 2845
7ea65411 2846 category = get_first(microformats, 'category') or search_meta('genre')
2847 channel_id = str_or_none(
2848 get_first(video_details, 'channelId')
2849 or get_first(microformats, 'externalChannelId')
2850 or search_meta('channelId'))
2851 duration = int_or_none(
2852 get_first(video_details, 'lengthSeconds')
2853 or get_first(microformats, 'lengthSeconds')
2854 or parse_duration(search_meta('duration'))) or None
2855 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2856
2857 live_content = get_first(video_details, 'isLiveContent')
2858 is_upcoming = get_first(video_details, 'isUpcoming')
2859 if is_live is None:
2860 if is_upcoming or live_content is False:
2861 is_live = False
2862 if is_upcoming is None and (live_content or is_live):
2863 is_upcoming = False
2864 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2865 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2866 if not duration and live_endtime and live_starttime:
2867 duration = live_endtime - live_starttime
2868
545cc85d 2869 info = {
2870 'id': video_id,
2871 'title': self._live_title(video_title) if is_live else video_title,
2872 'formats': formats,
2873 'thumbnails': thumbnails,
2874 'description': video_description,
2875 'upload_date': unified_strdate(
11f9be09 2876 get_first(microformats, 'uploadDate')
545cc85d 2877 or search_meta('uploadDate')),
11f9be09 2878 'uploader': get_first(video_details, 'author'),
545cc85d 2879 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2880 'uploader_url': owner_profile_url,
2881 'channel_id': channel_id,
11f9be09 2882 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2883 'duration': duration,
2884 'view_count': int_or_none(
11f9be09 2885 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2886 or search_meta('interactionCount')),
11f9be09 2887 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2888 'age_limit': 18 if (
11f9be09 2889 get_first(microformats, 'isFamilySafe') is False
545cc85d 2890 or search_meta('isFamilyFriendly') == 'false'
2891 or search_meta('og:restrictions:age') == '18+') else 0,
2892 'webpage_url': webpage_url,
2893 'categories': [category] if category else None,
2894 'tags': keywords,
11f9be09 2895 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2896 'is_live': is_live,
2897 'was_live': (False if is_live or is_upcoming or live_content is False
2898 else None if is_live is None or is_upcoming is None
2899 else live_content),
2900 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2901 'release_timestamp': live_starttime,
545cc85d 2902 }
b477fc13 2903
3944e7af 2904 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2905 # Converted into dicts to remove duplicates
2906 captions = {
2907 sub.get('baseUrl'): sub
2908 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2909 translation_languages = {
2910 lang.get('languageCode'): lang.get('languageName')
2911 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
545cc85d 2912 subtitles = {}
2913 if pctr:
774d79cc 2914 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2915 lang_subs = container.setdefault(lang_code, [])
545cc85d 2916 for fmt in self._SUBTITLE_FORMATS:
2917 query.update({
2918 'fmt': fmt,
2919 })
2920 lang_subs.append({
2921 'ext': fmt,
2922 'url': update_url_query(base_url, query),
774d79cc 2923 'name': sub_name,
545cc85d 2924 })
7e72694b 2925
3944e7af 2926 for base_url, caption_track in captions.items():
545cc85d 2927 if not base_url:
2928 continue
2929 if caption_track.get('kind') != 'asr':
120916da 2930 lang_code = (
2931 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2932 or caption_track.get('languageCode'))
545cc85d 2933 if not lang_code:
2934 continue
2935 process_language(
774d79cc 2936 subtitles, base_url, lang_code,
3944e7af 2937 traverse_obj(caption_track, ('name', 'simpleText')),
774d79cc 2938 {})
545cc85d 2939 continue
2940 automatic_captions = {}
3944e7af 2941 for trans_code, trans_name in translation_languages.items():
2942 if not trans_code:
545cc85d 2943 continue
2944 process_language(
3944e7af 2945 automatic_captions, base_url, trans_code,
2946 self._get_text(trans_name, max_runs=1),
2947 {'tlang': trans_code})
545cc85d 2948 info['automatic_captions'] = automatic_captions
2949 info['subtitles'] = subtitles
7e72694b 2950
545cc85d 2951 parsed_url = compat_urllib_parse_urlparse(url)
2952 for component in [parsed_url.fragment, parsed_url.query]:
2953 query = compat_parse_qs(component)
2954 for k, v in query.items():
2955 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2956 d_k += '_time'
2957 if d_k not in info and k in s_ks:
2958 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2959
2960 # Youtube Music Auto-generated description
822b9d9c 2961 if video_description:
38d70284 2962 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2963 if mobj:
822b9d9c
RA
2964 release_year = mobj.group('release_year')
2965 release_date = mobj.group('release_date')
2966 if release_date:
2967 release_date = release_date.replace('-', '')
2968 if not release_year:
545cc85d 2969 release_year = release_date[:4]
2970 info.update({
2971 'album': mobj.group('album'.strip()),
2972 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2973 'track': mobj.group('track').strip(),
2974 'release_date': release_date,
cc2db878 2975 'release_year': int_or_none(release_year),
545cc85d 2976 })
7e72694b 2977
545cc85d 2978 initial_data = None
2979 if webpage:
2980 initial_data = self._extract_yt_initial_variable(
2981 webpage, self._YT_INITIAL_DATA_RE, video_id,
2982 'yt initial data')
2983 if not initial_data:
11f9be09 2984 headers = self.generate_api_headers(
2985 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2986 session_index=self._extract_session_index(master_ytcfg))
2987
109dd3b2 2988 initial_data = self._extract_response(
2989 item_id=video_id, ep='next', fatal=False,
11f9be09 2990 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
109dd3b2 2991 note='Downloading initial data API JSON')
545cc85d 2992
c60ee3a2 2993 try:
2994 # This will error if there is no livechat
2995 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2996 info['subtitles']['live_chat'] = [{
2997 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2998 'video_id': video_id,
2999 'ext': 'json',
f6745c49 3000 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3001 }]
3002 except (KeyError, IndexError, TypeError):
3003 pass
545cc85d 3004
3005 if initial_data:
7c365c21 3006 info['chapters'] = (
3007 self._extract_chapters_from_json(initial_data, duration)
3008 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3009 or None)
545cc85d 3010
3011 contents = try_get(
3012 initial_data,
3013 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3014 list) or []
3015 for content in contents:
3016 vpir = content.get('videoPrimaryInfoRenderer')
3017 if vpir:
3018 stl = vpir.get('superTitleLink')
3019 if stl:
fe93e2c4 3020 stl = self._get_text(stl)
545cc85d 3021 if try_get(
3022 vpir,
3023 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3024 info['location'] = stl
3025 else:
3026 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3027 if mobj:
3028 info.update({
3029 'series': mobj.group(1),
3030 'season_number': int(mobj.group(2)),
3031 'episode_number': int(mobj.group(3)),
3032 })
3033 for tlb in (try_get(
3034 vpir,
3035 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3036 list) or []):
3037 tbr = tlb.get('toggleButtonRenderer') or {}
3038 for getter, regex in [(
3039 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3040 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3041 lambda x: x['accessibility'],
3042 lambda x: x['accessibilityData']['accessibilityData'],
3043 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3044 label = (try_get(tbr, getter, dict) or {}).get('label')
3045 if label:
3046 mobj = re.match(regex, label)
3047 if mobj:
3048 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3049 break
3050 sbr_tooltip = try_get(
3051 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3052 if sbr_tooltip:
3053 like_count, dislike_count = sbr_tooltip.split(' / ')
3054 info.update({
3055 'like_count': str_to_int(like_count),
3056 'dislike_count': str_to_int(dislike_count),
3057 })
3058 vsir = content.get('videoSecondaryInfoRenderer')
3059 if vsir:
052e1350 3060 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3061 rows = try_get(
3062 vsir,
3063 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3064 list) or []
3065 multiple_songs = False
3066 for row in rows:
3067 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3068 multiple_songs = True
3069 break
3070 for row in rows:
3071 mrr = row.get('metadataRowRenderer') or {}
3072 mrr_title = mrr.get('title')
3073 if not mrr_title:
3074 continue
052e1350 3075 mrr_title = self._get_text(mrr, 'title')
3076 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3077 if mrr_title == 'License':
3078 info['license'] = mrr_contents_text
3079 elif not multiple_songs:
3080 if mrr_title == 'Album':
3081 info['album'] = mrr_contents_text
3082 elif mrr_title == 'Artist':
3083 info['artist'] = mrr_contents_text
3084 elif mrr_title == 'Song':
3085 info['track'] = mrr_contents_text
3086
3087 fallbacks = {
3088 'channel': 'uploader',
3089 'channel_id': 'uploader_id',
3090 'channel_url': 'uploader_url',
3091 }
3092 for to, frm in fallbacks.items():
3093 if not info.get(to):
3094 info[to] = info.get(frm)
3095
3096 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3097 v = info.get(s_k)
3098 if v:
3099 info[d_k] = v
b84071c0 3100
11f9be09 3101 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3102 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3103 is_membersonly = None
b28f8d24 3104 is_premium = None
c224251a
M
3105 if initial_data and is_private is not None:
3106 is_membersonly = False
b28f8d24 3107 is_premium = False
47193e02 3108 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3109 badge_labels = set()
3110 for content in contents:
3111 if not isinstance(content, dict):
3112 continue
3113 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3114 for badge_label in badge_labels:
3115 if badge_label.lower() == 'members only':
3116 is_membersonly = True
3117 elif badge_label.lower() == 'premium':
3118 is_premium = True
3119 elif badge_label.lower() == 'unlisted':
3120 is_unlisted = True
c224251a 3121
c224251a
M
3122 info['availability'] = self._availability(
3123 is_private=is_private,
b28f8d24 3124 needs_premium=is_premium,
c224251a
M
3125 needs_subscription=is_membersonly,
3126 needs_auth=info['age_limit'] >= 18,
3127 is_unlisted=None if is_private is None else is_unlisted)
3128
06167fbb 3129 # get xsrf for annotations or comments
a06916d9 3130 get_annotations = self.get_param('writeannotations', False)
3131 get_comments = self.get_param('getcomments', False)
06167fbb 3132 if get_annotations or get_comments:
29f7c58a 3133 xsrf_token = None
11f9be09 3134 if master_ytcfg:
3135 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
29f7c58a 3136 if not xsrf_token:
3137 xsrf_token = self._search_regex(
3138 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 3139 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 3140
3141 # annotations
06167fbb 3142 if get_annotations:
11f9be09 3143 invideo_url = get_first(
3144 player_responses,
3145 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3146 expected_type=str)
64b6a4e9 3147 if xsrf_token and invideo_url:
29f7c58a 3148 xsrf_field_name = None
11f9be09 3149 if master_ytcfg:
3150 xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
29f7c58a 3151 if not xsrf_field_name:
3152 xsrf_field_name = self._search_regex(
3153 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3154 webpage, 'xsrf field name',
29f7c58a 3155 group='xsrf_field_name', default='session_token')
8a784c74 3156 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3157 self._proto_relative_url(invideo_url),
3158 video_id, note='Downloading annotations',
3159 errnote='Unable to download video annotations', fatal=False,
3160 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3161
277d6ff5 3162 if get_comments:
11f9be09 3163 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3164
11f9be09 3165 self.mark_watched(video_id, player_responses)
d77ab8e2 3166
545cc85d 3167 return info
c5e8d7af 3168
5f6a1245 3169
8bdd16b4 3170class YoutubeTabIE(YoutubeBaseInfoExtractor):
3171 IE_DESC = 'YouTube.com tab'
70d5c17b 3172 _VALID_URL = r'''(?x)
3173 https?://
3174 (?:\w+\.)?
3175 (?:
3176 youtube(?:kids)?\.com|
3177 invidio\.us
3178 )/
3179 (?:
fe03a6cd 3180 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3181 (?P<not_channel>
9ba5705a 3182 feed/|hashtag/|
70d5c17b 3183 (?:playlist|watch)\?.*?\blist=
3184 )|
29f7c58a 3185 (?!(?:%s)\b) # Direct URLs
70d5c17b 3186 )
3187 (?P<id>[^/?\#&]+)
3188 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3189 IE_NAME = 'youtube:tab'
3190
81127aa5 3191 _TESTS = [{
da692b79 3192 'note': 'playlists, multipage',
8bdd16b4 3193 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3194 'playlist_mincount': 94,
3195 'info_dict': {
3196 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3197 'title': 'Игорь Клейнер - Playlists',
3198 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3199 'uploader': 'Игорь Клейнер',
3200 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3201 },
3202 }, {
da692b79 3203 'note': 'playlists, multipage, different order',
8bdd16b4 3204 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3205 'playlist_mincount': 94,
3206 'info_dict': {
3207 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3208 'title': 'Игорь Клейнер - Playlists',
3209 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3210 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3211 'uploader': 'Игорь Клейнер',
8bdd16b4 3212 },
201c1459 3213 }, {
da692b79 3214 'note': 'playlists, series',
201c1459 3215 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3216 'playlist_mincount': 5,
3217 'info_dict': {
3218 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3219 'title': '3Blue1Brown - Playlists',
3220 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3221 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3222 'uploader': '3Blue1Brown',
201c1459 3223 },
8bdd16b4 3224 }, {
da692b79 3225 'note': 'playlists, singlepage',
8bdd16b4 3226 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3227 'playlist_mincount': 4,
3228 'info_dict': {
3229 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3230 'title': 'ThirstForScience - Playlists',
3231 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3232 'uploader': 'ThirstForScience',
3233 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3234 }
3235 }, {
3236 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3237 'only_matching': True,
3238 }, {
da692b79 3239 'note': 'basic, single video playlist',
0e30a7b9 3240 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3241 'info_dict': {
0e30a7b9 3242 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3243 'uploader': 'Sergey M.',
3244 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3245 'title': 'youtube-dl public playlist',
81127aa5 3246 },
0e30a7b9 3247 'playlist_count': 1,
9291475f 3248 }, {
da692b79 3249 'note': 'empty playlist',
0e30a7b9 3250 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3251 'info_dict': {
0e30a7b9 3252 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3253 'uploader': 'Sergey M.',
3254 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3255 'title': 'youtube-dl empty playlist',
9291475f
PH
3256 },
3257 'playlist_count': 0,
3258 }, {
da692b79 3259 'note': 'Home tab',
8bdd16b4 3260 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3261 'info_dict': {
8bdd16b4 3262 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3263 'title': 'lex will - Home',
3264 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3265 'uploader': 'lex will',
3266 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3267 },
8bdd16b4 3268 'playlist_mincount': 2,
9291475f 3269 }, {
da692b79 3270 'note': 'Videos tab',
8bdd16b4 3271 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3272 'info_dict': {
8bdd16b4 3273 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3274 'title': 'lex will - Videos',
3275 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3276 'uploader': 'lex will',
3277 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3278 },
8bdd16b4 3279 'playlist_mincount': 975,
9291475f 3280 }, {
da692b79 3281 'note': 'Videos tab, sorted by popular',
8bdd16b4 3282 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3283 'info_dict': {
8bdd16b4 3284 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3285 'title': 'lex will - Videos',
3286 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3287 'uploader': 'lex will',
3288 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3289 },
8bdd16b4 3290 'playlist_mincount': 199,
9291475f 3291 }, {
da692b79 3292 'note': 'Playlists tab',
8bdd16b4 3293 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3294 'info_dict': {
8bdd16b4 3295 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3296 'title': 'lex will - Playlists',
3297 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3298 'uploader': 'lex will',
3299 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3300 },
8bdd16b4 3301 'playlist_mincount': 17,
ac7553d0 3302 }, {
da692b79 3303 'note': 'Community tab',
8bdd16b4 3304 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3305 'info_dict': {
8bdd16b4 3306 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3307 'title': 'lex will - Community',
3308 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3309 'uploader': 'lex will',
3310 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3311 },
3312 'playlist_mincount': 18,
87dadd45 3313 }, {
da692b79 3314 'note': 'Channels tab',
8bdd16b4 3315 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3316 'info_dict': {
8bdd16b4 3317 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3318 'title': 'lex will - Channels',
3319 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3320 'uploader': 'lex will',
3321 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3322 },
deaec5af 3323 'playlist_mincount': 12,
cd684175 3324 }, {
3325 'note': 'Search tab',
3326 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3327 'playlist_mincount': 40,
3328 'info_dict': {
3329 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3330 'title': '3Blue1Brown - Search - linear algebra',
3331 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3332 'uploader': '3Blue1Brown',
3333 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3334 },
6b08cdf6 3335 }, {
a0566bbf 3336 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3337 'only_matching': True,
3338 }, {
a0566bbf 3339 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3340 'only_matching': True,
3341 }, {
a0566bbf 3342 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3343 'only_matching': True,
3344 }, {
3345 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3346 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3347 'info_dict': {
3348 'title': '29C3: Not my department',
3349 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3350 'uploader': 'Christiaan008',
3351 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3352 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3353 },
3354 'playlist_count': 96,
3355 }, {
3356 'note': 'Large playlist',
3357 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3358 'info_dict': {
8bdd16b4 3359 'title': 'Uploads from Cauchemar',
3360 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3361 'uploader': 'Cauchemar',
3362 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3363 },
8bdd16b4 3364 'playlist_mincount': 1123,
3365 }, {
da692b79 3366 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3367 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3368 'only_matching': True,
4b7df0d3
JMF
3369 }, {
3370 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3371 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3372 'info_dict': {
acf757f4
PH
3373 'title': 'Uploads from Interstellar Movie',
3374 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3375 'uploader': 'Interstellar Movie',
8bdd16b4 3376 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3377 },
481cc733 3378 'playlist_mincount': 21,
358de58c 3379 }, {
3380 'note': 'Playlist with "show unavailable videos" button',
3381 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3382 'info_dict': {
3383 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3384 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3385 'uploader': 'Phim Siêu Nhân Nhật Bản',
3386 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3387 },
da692b79 3388 'playlist_mincount': 200,
5d342002 3389 }, {
da692b79 3390 'note': 'Playlist with unavailable videos in page 7',
5d342002 3391 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3392 'info_dict': {
3393 'title': 'Uploads from BlankTV',
3394 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3395 'uploader': 'BlankTV',
3396 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3397 },
da692b79 3398 'playlist_mincount': 1000,
8bdd16b4 3399 }, {
da692b79 3400 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3401 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3402 'info_dict': {
3403 'title': 'Data Analysis with Dr Mike Pound',
3404 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3405 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3406 'uploader': 'Computerphile',
deaec5af 3407 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3408 },
3409 'playlist_mincount': 11,
3410 }, {
a0566bbf 3411 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3412 'only_matching': True,
dacb3a86 3413 }, {
da692b79 3414 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3415 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3416 'info_dict': {
3417 'id': 'FqZTN594JQw',
3418 'ext': 'webm',
3419 'title': "Smiley's People 01 detective, Adventure Series, Action",
3420 'uploader': 'STREEM',
3421 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3422 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3423 'upload_date': '20150526',
3424 'license': 'Standard YouTube License',
3425 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3426 'categories': ['People & Blogs'],
3427 'tags': list,
dbdaaa23 3428 'view_count': int,
dacb3a86
S
3429 'like_count': int,
3430 'dislike_count': int,
3431 },
3432 'params': {
3433 'skip_download': True,
3434 },
13a75688 3435 'skip': 'This video is not available.',
dacb3a86 3436 'add_ie': [YoutubeIE.ie_key()],
481cc733 3437 }, {
8bdd16b4 3438 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3439 'only_matching': True,
66b48727 3440 }, {
8bdd16b4 3441 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3442 'only_matching': True,
a0566bbf 3443 }, {
3444 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3445 'info_dict': {
11f9be09 3446 'id': 'FMtPN8yp5LU', # This will keep changing
a0566bbf 3447 'ext': 'mp4',
deaec5af 3448 'title': compat_str,
a0566bbf 3449 'uploader': 'Sky News',
3450 'uploader_id': 'skynews',
3451 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3452 'upload_date': r're:\d{8}',
3453 'description': compat_str,
a0566bbf 3454 'categories': ['News & Politics'],
3455 'tags': list,
3456 'like_count': int,
3457 'dislike_count': int,
3458 },
3459 'params': {
3460 'skip_download': True,
3461 },
da692b79 3462 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3463 }, {
3464 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3465 'info_dict': {
3466 'id': 'a48o2S1cPoo',
3467 'ext': 'mp4',
3468 'title': 'The Young Turks - Live Main Show',
3469 'uploader': 'The Young Turks',
3470 'uploader_id': 'TheYoungTurks',
3471 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3472 'upload_date': '20150715',
3473 'license': 'Standard YouTube License',
3474 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3475 'categories': ['News & Politics'],
3476 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3477 'like_count': int,
3478 'dislike_count': int,
3479 },
3480 'params': {
3481 'skip_download': True,
3482 },
3483 'only_matching': True,
3484 }, {
3485 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3486 'only_matching': True,
3487 }, {
3488 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3489 'only_matching': True,
09f1580e 3490 }, {
3491 'note': 'A channel that is not live. Should raise error',
3492 'url': 'https://www.youtube.com/user/numberphile/live',
3493 'only_matching': True,
3d3dddc9 3494 }, {
3495 'url': 'https://www.youtube.com/feed/trending',
3496 'only_matching': True,
3497 }, {
3d3dddc9 3498 'url': 'https://www.youtube.com/feed/library',
3499 'only_matching': True,
3500 }, {
3d3dddc9 3501 'url': 'https://www.youtube.com/feed/history',
3502 'only_matching': True,
3503 }, {
3d3dddc9 3504 'url': 'https://www.youtube.com/feed/subscriptions',
3505 'only_matching': True,
3506 }, {
3d3dddc9 3507 'url': 'https://www.youtube.com/feed/watch_later',
3508 'only_matching': True,
3509 }, {
da692b79 3510 'note': 'Recommended - redirects to home page',
3d3dddc9 3511 'url': 'https://www.youtube.com/feed/recommended',
3512 'only_matching': True,
29f7c58a 3513 }, {
da692b79 3514 'note': 'inline playlist with not always working continuations',
29f7c58a 3515 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3516 'only_matching': True,
3517 }, {
3518 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3519 'only_matching': True,
3520 }, {
3521 'url': 'https://www.youtube.com/course',
3522 'only_matching': True,
3523 }, {
3524 'url': 'https://www.youtube.com/zsecurity',
3525 'only_matching': True,
3526 }, {
3527 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3528 'only_matching': True,
3529 }, {
3530 'url': 'https://www.youtube.com/TheYoungTurks/live',
3531 'only_matching': True,
39ed931e 3532 }, {
3533 'url': 'https://www.youtube.com/hashtag/cctv9',
3534 'info_dict': {
3535 'id': 'cctv9',
3536 'title': '#cctv9',
3537 },
3538 'playlist_mincount': 350,
201c1459 3539 }, {
3540 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3541 'only_matching': True,
9297939e 3542 }, {
da692b79 3543 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3544 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3545 'only_matching': True
fe03a6cd 3546 }, {
3547 'note': '/browse/ should redirect to /channel/',
3548 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3549 'only_matching': True
3550 }, {
3551 'note': 'VLPL, should redirect to playlist?list=PL...',
3552 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3553 'info_dict': {
3554 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3555 'uploader': 'NoCopyrightSounds',
3556 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3557 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3558 'title': 'NCS Releases',
3559 },
3560 'playlist_mincount': 166,
18db7548 3561 }, {
3562 'note': 'Topic, should redirect to playlist?list=UU...',
3563 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3564 'info_dict': {
3565 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3566 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3567 'title': 'Uploads from Royalty Free Music - Topic',
3568 'uploader': 'Royalty Free Music - Topic',
3569 },
3570 'expected_warnings': [
3571 'A channel/user page was given',
3572 'The URL does not have a videos tab',
3573 ],
3574 'playlist_mincount': 101,
3575 }, {
3576 'note': 'Topic without a UU playlist',
3577 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3578 'info_dict': {
3579 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3580 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3581 },
3582 'expected_warnings': [
3583 'A channel/user page was given',
3584 'The URL does not have a videos tab',
3585 'Falling back to channel URL',
3586 ],
3587 'playlist_mincount': 9,
abcdd12b 3588 }, {
3589 'note': 'Youtube music Album',
3590 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3591 'info_dict': {
3592 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3593 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3594 },
3595 'playlist_count': 50,
47193e02 3596 }, {
3597 'note': 'unlisted single video playlist',
3598 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3599 'info_dict': {
3600 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3601 'uploader': 'colethedj',
3602 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3603 'title': 'yt-dlp unlisted playlist test',
3604 'availability': 'unlisted'
3605 },
3606 'playlist_count': 1,
29f7c58a 3607 }]
3608
3609 @classmethod
3610 def suitable(cls, url):
3611 return False if YoutubeIE.suitable(url) else super(
3612 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3613
3614 def _extract_channel_id(self, webpage):
3615 channel_id = self._html_search_meta(
3616 'channelId', webpage, 'channel id', default=None)
3617 if channel_id:
3618 return channel_id
3619 channel_url = self._html_search_meta(
3620 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3621 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3622 'twitter:app:url:googleplay'), webpage, 'channel url')
3623 return self._search_regex(
3624 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3625 channel_url, 'channel id')
15f6397c 3626
8bdd16b4 3627 @staticmethod
cd7c66cf 3628 def _extract_basic_item_renderer(item):
3629 # Modified from _extract_grid_item_renderer
201c1459 3630 known_basic_renderers = (
3631 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3632 )
3633 for key, renderer in item.items():
201c1459 3634 if not isinstance(renderer, dict):
cd7c66cf 3635 continue
201c1459 3636 elif key in known_basic_renderers:
3637 return renderer
3638 elif key.startswith('grid') and key.endswith('Renderer'):
3639 return renderer
8bdd16b4 3640
8bdd16b4 3641 def _grid_entries(self, grid_renderer):
3642 for item in grid_renderer['items']:
3643 if not isinstance(item, dict):
39b62db1 3644 continue
cd7c66cf 3645 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3646 if not isinstance(renderer, dict):
3647 continue
052e1350 3648 title = self._get_text(renderer, 'title')
fe93e2c4 3649
8bdd16b4 3650 # playlist
3651 playlist_id = renderer.get('playlistId')
3652 if playlist_id:
3653 yield self.url_result(
3654 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3655 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3656 video_title=title)
201c1459 3657 continue
8bdd16b4 3658 # video
3659 video_id = renderer.get('videoId')
3660 if video_id:
3661 yield self._extract_video(renderer)
201c1459 3662 continue
8bdd16b4 3663 # channel
3664 channel_id = renderer.get('channelId')
3665 if channel_id:
8bdd16b4 3666 yield self.url_result(
3667 'https://www.youtube.com/channel/%s' % channel_id,
3668 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3669 continue
3670 # generic endpoint URL support
3671 ep_url = urljoin('https://www.youtube.com/', try_get(
3672 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3673 compat_str))
3674 if ep_url:
3675 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3676 if ie.suitable(ep_url):
3677 yield self.url_result(
3678 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3679 break
8bdd16b4 3680
3d3dddc9 3681 def _shelf_entries_from_content(self, shelf_renderer):
3682 content = shelf_renderer.get('content')
3683 if not isinstance(content, dict):
8bdd16b4 3684 return
cd7c66cf 3685 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3686 if renderer:
3687 # TODO: add support for nested playlists so each shelf is processed
3688 # as separate playlist
3689 # TODO: this includes only first N items
3690 for entry in self._grid_entries(renderer):
3691 yield entry
3692 renderer = content.get('horizontalListRenderer')
3693 if renderer:
3694 # TODO
3695 pass
8bdd16b4 3696
29f7c58a 3697 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3698 ep = try_get(
3699 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3700 compat_str)
3701 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3702 if shelf_url:
29f7c58a 3703 # Skipping links to another channels, note that checking for
3704 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3705 # will not work
3706 if skip_channels and '/channels?' in shelf_url:
3707 return
052e1350 3708 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3709 yield self.url_result(shelf_url, video_title=title)
3710 # Shelf may not contain shelf URL, fallback to extraction from content
3711 for entry in self._shelf_entries_from_content(shelf_renderer):
3712 yield entry
c5e8d7af 3713
8bdd16b4 3714 def _playlist_entries(self, video_list_renderer):
3715 for content in video_list_renderer['contents']:
3716 if not isinstance(content, dict):
3717 continue
3718 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3719 if not isinstance(renderer, dict):
3720 continue
3721 video_id = renderer.get('videoId')
3722 if not video_id:
3723 continue
3724 yield self._extract_video(renderer)
07aeced6 3725
3462ffa8 3726 def _rich_entries(self, rich_grid_renderer):
3727 renderer = try_get(
70d5c17b 3728 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3729 video_id = renderer.get('videoId')
3730 if not video_id:
3731 return
3732 yield self._extract_video(renderer)
3733
8bdd16b4 3734 def _video_entry(self, video_renderer):
3735 video_id = video_renderer.get('videoId')
3736 if video_id:
3737 return self._extract_video(video_renderer)
dacb3a86 3738
8bdd16b4 3739 def _post_thread_entries(self, post_thread_renderer):
3740 post_renderer = try_get(
3741 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3742 if not post_renderer:
3743 return
3744 # video attachment
3745 video_renderer = try_get(
895b0931 3746 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3747 video_id = video_renderer.get('videoId')
3748 if video_id:
3749 entry = self._extract_video(video_renderer)
8bdd16b4 3750 if entry:
3751 yield entry
895b0931 3752 # playlist attachment
3753 playlist_id = try_get(
3754 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3755 if playlist_id:
3756 yield self.url_result(
e28f1c0a 3757 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3758 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3759 # inline video links
3760 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3761 for run in runs:
3762 if not isinstance(run, dict):
3763 continue
3764 ep_url = try_get(
3765 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3766 if not ep_url:
3767 continue
3768 if not YoutubeIE.suitable(ep_url):
3769 continue
3770 ep_video_id = YoutubeIE._match_id(ep_url)
3771 if video_id == ep_video_id:
3772 continue
895b0931 3773 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3774
8bdd16b4 3775 def _post_thread_continuation_entries(self, post_thread_continuation):
3776 contents = post_thread_continuation.get('contents')
3777 if not isinstance(contents, list):
3778 return
3779 for content in contents:
3780 renderer = content.get('backstagePostThreadRenderer')
3781 if not isinstance(renderer, dict):
3782 continue
3783 for entry in self._post_thread_entries(renderer):
3784 yield entry
07aeced6 3785
39ed931e 3786 r''' # unused
3787 def _rich_grid_entries(self, contents):
3788 for content in contents:
3789 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3790 if video_renderer:
3791 entry = self._video_entry(video_renderer)
3792 if entry:
3793 yield entry
3794 '''
f4f751af 3795 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3796
70d5c17b 3797 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3798 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3799 for content in contents:
3800 if not isinstance(content, dict):
8bdd16b4 3801 continue
70d5c17b 3802 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3803 if not is_renderer:
70d5c17b 3804 renderer = content.get('richItemRenderer')
3462ffa8 3805 if renderer:
3806 for entry in self._rich_entries(renderer):
3807 yield entry
3808 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3809 continue
3462ffa8 3810 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3811 for isr_content in isr_contents:
3812 if not isinstance(isr_content, dict):
3813 continue
69184e41 3814
3815 known_renderers = {
3816 'playlistVideoListRenderer': self._playlist_entries,
3817 'gridRenderer': self._grid_entries,
3818 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3819 'backstagePostThreadRenderer': self._post_thread_entries,
3820 'videoRenderer': lambda x: [self._video_entry(x)],
3821 }
3822 for key, renderer in isr_content.items():
3823 if key not in known_renderers:
3824 continue
3825 for entry in known_renderers[key](renderer):
3826 if entry:
3827 yield entry
3462ffa8 3828 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3829 break
70d5c17b 3830
3462ffa8 3831 if not continuation_list[0]:
3832 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3833
3834 if not continuation_list[0]:
3835 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3836
3837 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3838 tab_content = try_get(tab, lambda x: x['content'], dict)
3839 if not tab_content:
3840 return
3462ffa8 3841 parent_renderer = (
29f7c58a 3842 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3843 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3844 for entry in extract_entries(parent_renderer):
3845 yield entry
3462ffa8 3846 continuation = continuation_list[0]
fe93e2c4 3847 visitor_data = None
d069eca7 3848
8bdd16b4 3849 for page_num in itertools.count(1):
3850 if not continuation:
3851 break
11f9be09 3852 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3853 response = self._extract_response(
3854 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3855 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3856 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3857
3858 if not response:
8bdd16b4 3859 break
f4f751af 3860 visitor_data = try_get(
3861 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3862
69184e41 3863 known_continuation_renderers = {
3864 'playlistVideoListContinuation': self._playlist_entries,
3865 'gridContinuation': self._grid_entries,
3866 'itemSectionContinuation': self._post_thread_continuation_entries,
3867 'sectionListContinuation': extract_entries, # for feeds
3868 }
8bdd16b4 3869 continuation_contents = try_get(
69184e41 3870 response, lambda x: x['continuationContents'], dict) or {}
3871 continuation_renderer = None
3872 for key, value in continuation_contents.items():
3873 if key not in known_continuation_renderers:
3462ffa8 3874 continue
69184e41 3875 continuation_renderer = value
3876 continuation_list = [None]
3877 for entry in known_continuation_renderers[key](continuation_renderer):
3878 yield entry
3879 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3880 break
3881 if continuation_renderer:
3882 continue
c5e8d7af 3883
a1b535bd 3884 known_renderers = {
3885 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3886 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3887 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3888 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3889 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3890 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3891 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3892 }
cce889b9 3893 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3894 continuation_items = try_get(
cce889b9 3895 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3896 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3897 video_items_renderer = None
3898 for key, value in continuation_item.items():
3899 if key not in known_renderers:
8bdd16b4 3900 continue
a1b535bd 3901 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3902 continuation_list = [None]
a1b535bd 3903 for entry in known_renderers[key][0](video_items_renderer):
3904 yield entry
9ba5705a 3905 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3906 break
3907 if video_items_renderer:
3908 continue
8bdd16b4 3909 break
9558dcec 3910
8bdd16b4 3911 @staticmethod
3912 def _extract_selected_tab(tabs):
3913 for tab in tabs:
cd684175 3914 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3915 if renderer.get('selected') is True:
3916 return renderer
2b3c2546 3917 else:
8bdd16b4 3918 raise ExtractorError('Unable to find selected tab')
b82f815f 3919
47193e02 3920 @classmethod
3921 def _extract_uploader(cls, data):
8bdd16b4 3922 uploader = {}
47193e02 3923 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3924 owner = try_get(
3925 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3926 if owner:
3927 uploader['uploader'] = owner.get('text')
3928 uploader['uploader_id'] = try_get(
3929 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3930 uploader['uploader_url'] = urljoin(
3931 'https://www.youtube.com/',
3932 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3933 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3934
d069eca7 3935 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3936 playlist_id = title = description = channel_url = channel_name = channel_id = None
3937 thumbnails_list = tags = []
3938
8bdd16b4 3939 selected_tab = self._extract_selected_tab(tabs)
3940 renderer = try_get(
3941 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3942 if renderer:
b60419c5 3943 channel_name = renderer.get('title')
3944 channel_url = renderer.get('channelUrl')
3945 channel_id = renderer.get('externalId')
39ed931e 3946 else:
64c0d954 3947 renderer = try_get(
3948 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3949
8bdd16b4 3950 if renderer:
3951 title = renderer.get('title')
ecc97af3 3952 description = renderer.get('description', '')
b60419c5 3953 playlist_id = channel_id
3954 tags = renderer.get('keywords', '').split()
3955 thumbnails_list = (
3956 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3957 or try_get(
47193e02 3958 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3959 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3960 list)
b60419c5 3961 or [])
3962
3963 thumbnails = []
3964 for t in thumbnails_list:
3965 if not isinstance(t, dict):
3966 continue
3967 thumbnail_url = url_or_none(t.get('url'))
3968 if not thumbnail_url:
3969 continue
3970 thumbnails.append({
3971 'url': thumbnail_url,
3972 'width': int_or_none(t.get('width')),
3973 'height': int_or_none(t.get('height')),
3974 })
3462ffa8 3975 if playlist_id is None:
70d5c17b 3976 playlist_id = item_id
3977 if title is None:
39ed931e 3978 title = (
3979 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3980 or playlist_id)
b60419c5 3981 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3982 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3983 metadata = {
3984 'playlist_id': playlist_id,
3985 'playlist_title': title,
3986 'playlist_description': description,
3987 'uploader': channel_name,
3988 'uploader_id': channel_id,
3989 'uploader_url': channel_url,
3990 'thumbnails': thumbnails,
3991 'tags': tags,
3992 }
47193e02 3993 availability = self._extract_availability(data)
3994 if availability:
3995 metadata['availability'] = availability
b60419c5 3996 if not channel_id:
3997 metadata.update(self._extract_uploader(data))
3998 metadata.update({
3999 'channel': metadata['uploader'],
4000 'channel_id': metadata['uploader_id'],
4001 'channel_url': metadata['uploader_url']})
11f9be09 4002 ytcfg = self.extract_ytcfg(item_id, webpage)
b60419c5 4003 return self.playlist_result(
d069eca7
M
4004 self._entries(
4005 selected_tab, playlist_id,
4006 self._extract_identity_token(webpage, item_id),
fe93e2c4 4007 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 4008 **metadata)
73c4ac2c 4009
79360d99 4010 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 4011 first_id = last_id = None
11f9be09 4012 ytcfg = self.extract_ytcfg(playlist_id, webpage)
4013 headers = self.generate_api_headers(
fe93e2c4 4014 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4015 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 4016 for page_num in itertools.count(1):
cd7c66cf 4017 videos = list(self._playlist_entries(playlist))
4018 if not videos:
4019 return
2be71994 4020 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4021 if start >= len(videos):
4022 return
4023 for video in videos[start:]:
4024 if video['id'] == first_id:
4025 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4026 return
4027 yield video
4028 first_id = first_id or videos[0]['id']
4029 last_id = videos[-1]['id']
79360d99 4030 watch_endpoint = try_get(
4031 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4032 query = {
4033 'playlistId': playlist_id,
4034 'videoId': watch_endpoint.get('videoId') or last_id,
4035 'index': watch_endpoint.get('index') or len(videos),
4036 'params': watch_endpoint.get('params') or 'OAE%3D'
4037 }
4038 response = self._extract_response(
4039 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4040 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4041 check_get_keys='contents'
4042 )
cd7c66cf 4043 playlist = try_get(
79360d99 4044 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4045
79360d99 4046 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 4047 title = playlist.get('title') or try_get(
4048 data, lambda x: x['titleText']['simpleText'], compat_str)
4049 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4050
4051 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4052 playlist_url = urljoin(url, try_get(
4053 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4054 compat_str))
4055 if playlist_url and playlist_url != url:
4056 return self.url_result(
4057 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4058 video_title=title)
cd7c66cf 4059
8bdd16b4 4060 return self.playlist_result(
79360d99 4061 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 4062 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4063
47193e02 4064 def _extract_availability(self, data):
4065 """
4066 Gets the availability of a given playlist/tab.
4067 Note: Unless YouTube tells us explicitly, we do not assume it is public
4068 @param data: response
4069 """
4070 is_private = is_unlisted = None
4071 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4072 badge_labels = self._extract_badges(renderer)
4073
4074 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4075 privacy_dropdown_entries = try_get(
4076 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4077 for renderer_dict in privacy_dropdown_entries:
4078 is_selected = try_get(
4079 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4080 if not is_selected:
4081 continue
052e1350 4082 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4083 if label:
4084 badge_labels.add(label.lower())
4085 break
4086
4087 for badge_label in badge_labels:
4088 if badge_label == 'unlisted':
4089 is_unlisted = True
4090 elif badge_label == 'private':
4091 is_private = True
4092 elif badge_label == 'public':
4093 is_unlisted = is_private = False
4094 return self._availability(is_private, False, False, False, is_unlisted)
4095
4096 @staticmethod
4097 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4098 sidebar_renderer = try_get(
4099 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4100 for item in sidebar_renderer:
4101 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4102 if renderer:
4103 return renderer
4104
358de58c 4105 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4106 """
4107 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4108 """
5d342002 4109 browse_id = params = None
47193e02 4110 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4111 if not renderer:
4112 return
4113 menu_renderer = try_get(
4114 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4115 for menu_item in menu_renderer:
4116 if not isinstance(menu_item, dict):
358de58c 4117 continue
47193e02 4118 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4119 text = try_get(
4120 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4121 if not text or text.lower() != 'show unavailable videos':
4122 continue
4123 browse_endpoint = try_get(
4124 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4125 browse_id = browse_endpoint.get('browseId')
4126 params = browse_endpoint.get('params')
4127 break
5d342002 4128
11f9be09 4129 ytcfg = self.extract_ytcfg(item_id, webpage)
4130 headers = self.generate_api_headers(
fe93e2c4 4131 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 4132 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4133 visitor_data=try_get(
4134 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4135 query = {
4136 'params': params or 'wgYCCAA=',
4137 'browseId': browse_id or 'VL%s' % item_id
4138 }
4139 return self._extract_response(
4140 item_id=item_id, headers=headers, query=query,
fe93e2c4 4141 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4142 note='Downloading API JSON with unavailable videos')
358de58c 4143
cd7c66cf 4144 def _extract_webpage(self, url, item_id):
a06916d9 4145 retries = self.get_param('extractor_retries', 3)
62bff2c1 4146 count = -1
c705177d 4147 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4148 while count < retries:
62bff2c1 4149 count += 1
14fdfea9 4150 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4151 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4152 if count:
c705177d 4153 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4154 webpage = self._download_webpage(
4155 url, item_id,
cd7c66cf 4156 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
11f9be09 4157 data = self.extract_yt_initial_data(item_id, webpage)
14fdfea9 4158 if data.get('contents') or data.get('currentVideoEndpoint'):
4159 break
95c01b6c 4160 # Extract alerts here only when there is error
4161 self._extract_and_report_alerts(data)
c705177d 4162 if count >= retries:
6a39ee13 4163 raise ExtractorError(last_error)
cd7c66cf 4164 return webpage, data
4165
9297939e 4166 @staticmethod
4167 def _smuggle_data(entries, data):
4168 for entry in entries:
4169 if data:
4170 entry['url'] = smuggle_url(entry['url'], data)
4171 yield entry
4172
cd7c66cf 4173 def _real_extract(self, url):
9297939e 4174 url, smuggled_data = unsmuggle_url(url, {})
4175 if self.is_music_url(url):
4176 smuggled_data['is_music_url'] = True
fe03a6cd 4177 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4178 if info_dict.get('entries'):
4179 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4180 return info_dict
4181
fe03a6cd 4182 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4183
4184 def __real_extract(self, url, smuggled_data):
cd7c66cf 4185 item_id = self._match_id(url)
4186 url = compat_urlparse.urlunparse(
4187 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4188 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4189
fe03a6cd 4190 def get_mobj(url):
4191 mobj = self._url_re.match(url).groupdict()
07cce701 4192 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4193 return mobj
4194
4195 mobj = get_mobj(url)
4196 # Youtube returns incomplete data if tabname is not lower case
4197 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4198
4199 if is_channel:
4200 if smuggled_data.get('is_music_url'):
4201 if item_id[:2] == 'VL':
4202 # Youtube music VL channels have an equivalent playlist
4203 item_id = item_id[2:]
4204 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4205 elif item_id[:2] == 'MP':
4206 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4207 item_id = self._search_regex(
4208 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4209 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4210 'playlist id')
4211 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4212 elif mobj['channel_type'] == 'browse':
4213 # Youtube music /browse/ should be changed to /channel/
4214 pre = 'https://www.youtube.com/channel/%s' % item_id
4215 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4216 # Home URLs should redirect to /videos/
6a39ee13 4217 self.report_warning(
cd7c66cf 4218 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4219 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4220 tab = '/videos'
4221
4222 url = ''.join((pre, tab, post))
4223 mobj = get_mobj(url)
cd7c66cf 4224
4225 # Handle both video/playlist URLs
201c1459 4226 qs = parse_qs(url)
cd7c66cf 4227 video_id = qs.get('v', [None])[0]
4228 playlist_id = qs.get('list', [None])[0]
4229
fe03a6cd 4230 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4231 if not playlist_id:
fe03a6cd 4232 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4233 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4234 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4235 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4236 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4237 mobj = get_mobj(url)
cd7c66cf 4238
4239 if video_id and playlist_id:
a06916d9 4240 if self.get_param('noplaylist'):
cd7c66cf 4241 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4242 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4243 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4244
4245 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4246
18db7548 4247 tabs = try_get(
4248 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4249 if tabs:
4250 selected_tab = self._extract_selected_tab(tabs)
4251 tab_name = selected_tab.get('title', '')
09f1580e 4252 if 'no-youtube-channel-redirect' not in compat_opts:
4253 if mobj['tab'] == '/live':
4254 # Live tab should have redirected to the video
4255 raise ExtractorError('The channel is not currently live', expected=True)
4256 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4257 if not mobj['not_channel'] and item_id[:2] == 'UC':
4258 # Topic channels don't have /videos. Use the equivalent playlist instead
4259 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4260 pl_id = 'UU%s' % item_id[2:]
4261 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4262 try:
4263 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4264 for alert_type, alert_message in self._extract_alerts(pl_data):
4265 if alert_type == 'error':
4266 raise ExtractorError('Youtube said: %s' % alert_message)
4267 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4268 except ExtractorError:
4269 self.report_warning('The playlist gave error. Falling back to channel URL')
4270 else:
4271 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4272
4273 self.write_debug('Final URL: %s' % url)
4274
358de58c 4275 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4276 if 'no-youtube-unavailable-videos' not in compat_opts:
4277 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4278 self._extract_and_report_alerts(data)
8bdd16b4 4279 tabs = try_get(
4280 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4281 if tabs:
d069eca7 4282 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4283
8bdd16b4 4284 playlist = try_get(
4285 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4286 if playlist:
79360d99 4287 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4288
a0566bbf 4289 video_id = try_get(
4290 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4291 compat_str) or video_id
8bdd16b4 4292 if video_id:
09f1580e 4293 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4294 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4295 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4296
8bdd16b4 4297 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4298
c5e8d7af 4299
8bdd16b4 4300class YoutubePlaylistIE(InfoExtractor):
4301 IE_DESC = 'YouTube.com playlists'
4302 _VALID_URL = r'''(?x)(?:
4303 (?:https?://)?
4304 (?:\w+\.)?
4305 (?:
4306 (?:
4307 youtube(?:kids)?\.com|
29f7c58a 4308 invidio\.us
8bdd16b4 4309 )
4310 /.*?\?.*?\blist=
4311 )?
4312 (?P<id>%(playlist_id)s)
4313 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4314 IE_NAME = 'youtube:playlist'
cdc628a4 4315 _TESTS = [{
8bdd16b4 4316 'note': 'issue #673',
4317 'url': 'PLBB231211A4F62143',
cdc628a4 4318 'info_dict': {
8bdd16b4 4319 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4320 'id': 'PLBB231211A4F62143',
4321 'uploader': 'Wickydoo',
4322 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4323 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4324 },
4325 'playlist_mincount': 29,
4326 }, {
4327 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4328 'info_dict': {
4329 'title': 'YDL_safe_search',
4330 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4331 },
4332 'playlist_count': 2,
4333 'skip': 'This playlist is private',
9558dcec 4334 }, {
8bdd16b4 4335 'note': 'embedded',
4336 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4337 'playlist_count': 4,
9558dcec 4338 'info_dict': {
8bdd16b4 4339 'title': 'JODA15',
4340 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4341 'uploader': 'milan',
4342 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4343 }
cdc628a4 4344 }, {
8bdd16b4 4345 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4346 'playlist_mincount': 654,
8bdd16b4 4347 'info_dict': {
4348 'title': '2018 Chinese New Singles (11/6 updated)',
4349 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4350 'uploader': 'LBK',
4351 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4352 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4353 }
daa0df9e 4354 }, {
29f7c58a 4355 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4356 'only_matching': True,
4357 }, {
4358 # music album playlist
4359 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4360 'only_matching': True,
4361 }]
4362
4363 @classmethod
4364 def suitable(cls, url):
201c1459 4365 if YoutubeTabIE.suitable(url):
4366 return False
1bdae7d3 4367 # Hack for lazy extractors until more generic solution is implemented
4368 # (see #28780)
4369 from .youtube import parse_qs
201c1459 4370 qs = parse_qs(url)
4371 if qs.get('v', [None])[0]:
4372 return False
4373 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4374
4375 def _real_extract(self, url):
4376 playlist_id = self._match_id(url)
46953e7e 4377 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4378 url = update_url_query(
4379 'https://www.youtube.com/playlist',
4380 parse_qs(url) or {'list': playlist_id})
4381 if is_music_url:
4382 url = smuggle_url(url, {'is_music_url': True})
4383 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4384
4385
4386class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4387 IE_DESC = 'youtu.be'
29f7c58a 4388 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4389 _TESTS = [{
8bdd16b4 4390 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4391 'info_dict': {
4392 'id': 'yeWKywCrFtk',
4393 'ext': 'mp4',
4394 'title': 'Small Scale Baler and Braiding Rugs',
4395 'uploader': 'Backus-Page House Museum',
4396 'uploader_id': 'backuspagemuseum',
4397 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4398 'upload_date': '20161008',
4399 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4400 'categories': ['Nonprofits & Activism'],
4401 'tags': list,
4402 'like_count': int,
4403 'dislike_count': int,
4404 },
4405 'params': {
4406 'noplaylist': True,
4407 'skip_download': True,
4408 },
39e7107d 4409 }, {
8bdd16b4 4410 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4411 'only_matching': True,
cdc628a4
PH
4412 }]
4413
8bdd16b4 4414 def _real_extract(self, url):
29f7c58a 4415 mobj = re.match(self._VALID_URL, url)
4416 video_id = mobj.group('id')
4417 playlist_id = mobj.group('playlist_id')
8bdd16b4 4418 return self.url_result(
29f7c58a 4419 update_url_query('https://www.youtube.com/watch', {
4420 'v': video_id,
4421 'list': playlist_id,
4422 'feature': 'youtu.be',
4423 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4424
4425
4426class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4427 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4428 _VALID_URL = r'ytuser:(?P<id>.+)'
4429 _TESTS = [{
4430 'url': 'ytuser:phihag',
4431 'only_matching': True,
4432 }]
4433
4434 def _real_extract(self, url):
4435 user_id = self._match_id(url)
4436 return self.url_result(
4437 'https://www.youtube.com/user/%s' % user_id,
4438 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4439
b05654f0 4440
3d3dddc9 4441class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4442 IE_NAME = 'youtube:favorites'
4443 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4444 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4445 _LOGIN_REQUIRED = True
4446 _TESTS = [{
4447 'url': ':ytfav',
4448 'only_matching': True,
4449 }, {
4450 'url': ':ytfavorites',
4451 'only_matching': True,
4452 }]
4453
4454 def _real_extract(self, url):
4455 return self.url_result(
4456 'https://www.youtube.com/playlist?list=LL',
4457 ie=YoutubeTabIE.ie_key())
4458
4459
79360d99 4460class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4461 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4462 # there doesn't appear to be a real limit, for example if you search for
4463 # 'python' you get more than 8.000.000 results
4464 _MAX_RESULTS = float('inf')
78caa52a 4465 IE_NAME = 'youtube:search'
b05654f0 4466 _SEARCH_KEY = 'ytsearch'
6c894ea1 4467 _SEARCH_PARAMS = None
9dd8e46a 4468 _TESTS = []
b05654f0 4469
6c894ea1 4470 def _entries(self, query, n):
a5c56234 4471 data = {'query': query}
6c894ea1
U
4472 if self._SEARCH_PARAMS:
4473 data['params'] = self._SEARCH_PARAMS
4474 total = 0
fe93e2c4 4475 continuation = {}
6c894ea1 4476 for page_num in itertools.count(1):
fe93e2c4 4477 data.update(continuation)
79360d99 4478 search = self._extract_response(
4479 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4480 check_get_keys=('contents', 'onResponseReceivedCommands')
4481 )
6c894ea1 4482 if not search:
b4c08069 4483 break
6c894ea1
U
4484 slr_contents = try_get(
4485 search,
4486 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4487 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4488 list)
4489 if not slr_contents:
a22b2fd1 4490 break
0366ae87 4491
0366ae87
M
4492 # Youtube sometimes adds promoted content to searches,
4493 # changing the index location of videos and token.
4494 # So we search through all entries till we find them.
fe93e2c4 4495 continuation = None
30a074c2 4496 for slr_content in slr_contents:
fe93e2c4 4497 if not continuation:
4498 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4499
30a074c2 4500 isr_contents = try_get(
4501 slr_content,
4502 lambda x: x['itemSectionRenderer']['contents'],
4503 list)
9da76d30 4504 if not isr_contents:
30a074c2 4505 continue
4506 for content in isr_contents:
4507 if not isinstance(content, dict):
4508 continue
4509 video = content.get('videoRenderer')
4510 if not isinstance(video, dict):
4511 continue
4512 video_id = video.get('videoId')
4513 if not video_id:
4514 continue
4515
4516 yield self._extract_video(video)
4517 total += 1
4518 if total == n:
4519 return
0366ae87 4520
fe93e2c4 4521 if not continuation:
6c894ea1 4522 break
b05654f0 4523
6c894ea1
U
4524 def _get_n_results(self, query, n):
4525 """Get a specified number of results for a query"""
11f9be09 4526 return self.playlist_result(self._entries(query, n), query, query)
75dff0ee 4527
c9ae7b95 4528
a3dd9248 4529class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4530 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4531 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4532 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4533 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4534
c9ae7b95 4535
386e1dd9 4536class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4537 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4538 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4539 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4540 # _MAX_RESULTS = 100
3462ffa8 4541 _TESTS = [{
4542 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4543 'playlist_mincount': 5,
4544 'info_dict': {
11f9be09 4545 'id': 'youtube-dl test video',
3462ffa8 4546 'title': 'youtube-dl test video',
4547 }
4548 }, {
4549 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4550 'only_matching': True,
4551 }]
4552
386e1dd9 4553 @classmethod
4554 def _make_valid_url(cls):
4555 return cls._VALID_URL
4556
3462ffa8 4557 def _real_extract(self, url):
386e1dd9 4558 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4559 query = (qs.get('search_query') or qs.get('q'))[0]
4560 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4561 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4562
4563
4564class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4565 """
25f14e9f 4566 Base class for feed extractors
3d3dddc9 4567 Subclasses must define the _FEED_NAME property.
d7ae0639 4568 """
b2e8bc1b 4569 _LOGIN_REQUIRED = True
ef2f3c7f 4570 _TESTS = []
d7ae0639
JMF
4571
4572 @property
4573 def IE_NAME(self):
78caa52a 4574 return 'youtube:%s' % self._FEED_NAME
04cc9617 4575
3853309f 4576 def _real_extract(self, url):
3d3dddc9 4577 return self.url_result(
4578 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4579 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4580
4581
ef2f3c7f 4582class YoutubeWatchLaterIE(InfoExtractor):
4583 IE_NAME = 'youtube:watchlater'
70d5c17b 4584 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4585 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4586 _TESTS = [{
8bdd16b4 4587 'url': ':ytwatchlater',
bc7a9cd8
S
4588 'only_matching': True,
4589 }]
25f14e9f
S
4590
4591 def _real_extract(self, url):
ef2f3c7f 4592 return self.url_result(
4593 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4594
4595
25f14e9f
S
4596class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4597 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4598 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4599 _FEED_NAME = 'recommended'
45db527f 4600 _LOGIN_REQUIRED = False
3d3dddc9 4601 _TESTS = [{
4602 'url': ':ytrec',
4603 'only_matching': True,
4604 }, {
4605 'url': ':ytrecommended',
4606 'only_matching': True,
4607 }, {
4608 'url': 'https://youtube.com',
4609 'only_matching': True,
4610 }]
1ed5b5c9 4611
1ed5b5c9 4612
25f14e9f 4613class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4614 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4615 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4616 _FEED_NAME = 'subscriptions'
3d3dddc9 4617 _TESTS = [{
4618 'url': ':ytsubs',
4619 'only_matching': True,
4620 }, {
4621 'url': ':ytsubscriptions',
4622 'only_matching': True,
4623 }]
1ed5b5c9 4624
1ed5b5c9 4625
25f14e9f 4626class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4627 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4628 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4629 _FEED_NAME = 'history'
3d3dddc9 4630 _TESTS = [{
4631 'url': ':ythistory',
4632 'only_matching': True,
4633 }]
1ed5b5c9
JMF
4634
4635
15870e90
PH
4636class YoutubeTruncatedURLIE(InfoExtractor):
4637 IE_NAME = 'youtube:truncated_url'
4638 IE_DESC = False # Do not list
975d35db 4639 _VALID_URL = r'''(?x)
b95aab84
PH
4640 (?:https?://)?
4641 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4642 (?:watch\?(?:
c4808c60 4643 feature=[a-z_]+|
b95aab84
PH
4644 annotation_id=annotation_[^&]+|
4645 x-yt-cl=[0-9]+|
c1708b89 4646 hl=[^&]*|
287be8c6 4647 t=[0-9]+
b95aab84
PH
4648 )?
4649 |
4650 attribution_link\?a=[^&]+
4651 )
4652 $
975d35db 4653 '''
15870e90 4654
c4808c60 4655 _TESTS = [{
2d3d2997 4656 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4657 'only_matching': True,
dc2fc736 4658 }, {
2d3d2997 4659 'url': 'https://www.youtube.com/watch?',
dc2fc736 4660 'only_matching': True,
b95aab84
PH
4661 }, {
4662 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4663 'only_matching': True,
4664 }, {
4665 'url': 'https://www.youtube.com/watch?feature=foo',
4666 'only_matching': True,
c1708b89
PH
4667 }, {
4668 'url': 'https://www.youtube.com/watch?hl=en-GB',
4669 'only_matching': True,
287be8c6
PH
4670 }, {
4671 'url': 'https://www.youtube.com/watch?t=2372',
4672 'only_matching': True,
c4808c60
PH
4673 }]
4674
15870e90
PH
4675 def _real_extract(self, url):
4676 raise ExtractorError(
78caa52a
PH
4677 'Did you forget to quote the URL? Remember that & is a meta '
4678 'character in most shells, so you want to put the URL in quotes, '
3867038a 4679 'like youtube-dl '
2d3d2997 4680 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4681 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4682 expected=True)
772fd5cc
PH
4683
4684
4685class YoutubeTruncatedIDIE(InfoExtractor):
4686 IE_NAME = 'youtube:truncated_id'
4687 IE_DESC = False # Do not list
b95aab84 4688 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4689
4690 _TESTS = [{
4691 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4692 'only_matching': True,
4693 }]
4694
4695 def _real_extract(self, url):
4696 video_id = self._match_id(url)
4697 raise ExtractorError(
4698 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4699 expected=True)