]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube] Force `hl=en` for comments (#594)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
94278f72 41 mimetype2ext,
9c0d7f49 42 network_exceptions,
11f9be09 43 orderedSet,
6310acf5 44 parse_codecs,
49bd8c66 45 parse_count,
7c80519c 46 parse_duration,
7ea65411 47 parse_iso8601,
dca3ff4a 48 qualities,
3995d37d 49 remove_start,
cf7e015f 50 smuggle_url,
dbdaaa23 51 str_or_none,
c93d53f5 52 str_to_int,
7c365c21 53 traverse_obj,
556dbe7f 54 try_get,
c5e8d7af
PH
55 unescapeHTML,
56 unified_strdate,
cf7e015f 57 unsmuggle_url,
8bdd16b4 58 update_url_query,
21c340b8 59 url_or_none,
6e6bc8da 60 urlencode_postdata,
fe93e2c4 61 urljoin,
7c365c21 62 variadic,
c5e8d7af
PH
63)
64
5f6a1245 65
201c1459 66def parse_qs(url):
67 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
68
69
de7f3446 70class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
71 """Provide base functions for Youtube extractors"""
72 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 73 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
74
75 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
76 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
77 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 78
3462ffa8 79 _RESERVED_NAMES = (
bea74222 80 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 81 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 82 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 83
b2e8bc1b
JMF
84 _NETRC_MACHINE = 'youtube'
85 # If True it will raise an error if no login info is provided
86 _LOGIN_REQUIRED = False
87
70d5c17b 88 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 89
b2e8bc1b 90 def _login(self):
83317f69 91 """
92 Attempt to log in to YouTube.
93 True is returned if successful or skipped.
94 False is returned if login failed.
95
96 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
97 """
9d5d4d64 98
99 def warn(message):
100 self.report_warning(message)
101
102 # username+password login is broken
982ee69a
MB
103 if (self._LOGIN_REQUIRED
104 and self.get_param('cookiefile') is None
105 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 106 self.raise_login_required(
107 'Login details are needed to download this content', method='cookies')
68217024 108 username, password = self._get_login_info()
9d5d4d64 109 if username:
110 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
111 return
9d5d4d64 112
2d6659b9 113 # Everything below this is broken!
114 r'''
b2e8bc1b
JMF
115 # No authentication to be performed
116 if username is None:
a06916d9 117 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 118 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 119 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 120 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 121 return True
b2e8bc1b 122
7cc3570e
PH
123 login_page = self._download_webpage(
124 self._LOGIN_URL, None,
69ea8ca4
PH
125 note='Downloading login page',
126 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
127 if login_page is False:
128 return
b2e8bc1b 129
1212e997 130 login_form = self._hidden_inputs(login_page)
c5e8d7af 131
e00eb564
S
132 def req(url, f_req, note, errnote):
133 data = login_form.copy()
134 data.update({
135 'pstMsg': 1,
136 'checkConnection': 'youtube',
137 'checkedDomains': 'youtube',
138 'hl': 'en',
139 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 140 'f.req': json.dumps(f_req),
e00eb564
S
141 'flowName': 'GlifWebSignIn',
142 'flowEntry': 'ServiceLogin',
baf67a60
S
143 # TODO: reverse actual botguard identifier generation algo
144 'bgRequest': '["identifier",""]',
041bc3ad 145 })
e00eb564
S
146 return self._download_json(
147 url, None, note=note, errnote=errnote,
148 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
149 fatal=False,
150 data=urlencode_postdata(data), headers={
151 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
152 'Google-Accounts-XSRF': 1,
153 })
154
3995d37d
S
155 lookup_req = [
156 username,
157 None, [], None, 'US', None, None, 2, False, True,
158 [
159 None, None,
160 [2, 1, None, 1,
161 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
162 None, [], 4],
163 1, [None, None, []], None, None, None, True
164 ],
165 username,
166 ]
167
e00eb564 168 lookup_results = req(
3995d37d 169 self._LOOKUP_URL, lookup_req,
e00eb564
S
170 'Looking up account info', 'Unable to look up account info')
171
172 if lookup_results is False:
173 return False
041bc3ad 174
3995d37d
S
175 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
176 if not user_hash:
177 warn('Unable to extract user hash')
178 return False
179
180 challenge_req = [
181 user_hash,
182 None, 1, None, [1, None, None, None, [password, None, True]],
183 [
184 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
185 1, [None, None, []], None, None, None, True
186 ]]
83317f69 187
3995d37d
S
188 challenge_results = req(
189 self._CHALLENGE_URL, challenge_req,
190 'Logging in', 'Unable to log in')
83317f69 191
3995d37d 192 if challenge_results is False:
e00eb564 193 return
83317f69 194
3995d37d
S
195 login_res = try_get(challenge_results, lambda x: x[0][5], list)
196 if login_res:
197 login_msg = try_get(login_res, lambda x: x[5], compat_str)
198 warn(
199 'Unable to login: %s' % 'Invalid password'
200 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
201 return False
202
203 res = try_get(challenge_results, lambda x: x[0][-1], list)
204 if not res:
205 warn('Unable to extract result entry')
206 return False
207
9a6628aa
S
208 login_challenge = try_get(res, lambda x: x[0][0], list)
209 if login_challenge:
210 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
211 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
212 # SEND_SUCCESS - TFA code has been successfully sent to phone
213 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 214 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
215 if status == 'QUOTA_EXCEEDED':
216 warn('Exceeded the limit of TFA codes, try later')
217 return False
218
219 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
220 if not tl:
221 warn('Unable to extract TL')
222 return False
223
224 tfa_code = self._get_tfa_info('2-step verification code')
225
226 if not tfa_code:
227 warn(
228 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
229 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
230 return False
231
232 tfa_code = remove_start(tfa_code, 'G-')
233
234 tfa_req = [
235 user_hash, None, 2, None,
236 [
237 9, None, None, None, None, None, None, None,
238 [None, tfa_code, True, 2]
239 ]]
240
241 tfa_results = req(
242 self._TFA_URL.format(tl), tfa_req,
243 'Submitting TFA code', 'Unable to submit TFA code')
244
245 if tfa_results is False:
246 return False
247
248 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
249 if tfa_res:
250 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
251 warn(
252 'Unable to finish TFA: %s' % 'Invalid TFA code'
253 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
254 return False
255
256 check_cookie_url = try_get(
257 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
258 else:
259 CHALLENGES = {
260 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
261 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
262 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
263 }
264 challenge = CHALLENGES.get(
265 challenge_str,
266 '%s returned error %s.' % (self.IE_NAME, challenge_str))
267 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
268 return False
3995d37d
S
269 else:
270 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
271
272 if not check_cookie_url:
273 warn('Unable to extract CheckCookie URL')
274 return False
e00eb564
S
275
276 check_cookie_results = self._download_webpage(
3995d37d
S
277 check_cookie_url, None, 'Checking cookie', fatal=False)
278
279 if check_cookie_results is False:
280 return False
e00eb564 281
3995d37d
S
282 if 'https://myaccount.google.com/' not in check_cookie_results:
283 warn('Unable to log in')
b2e8bc1b 284 return False
e00eb564 285
b2e8bc1b 286 return True
2d6659b9 287 '''
b2e8bc1b 288
cce889b9 289 def _initialize_consent(self):
290 cookies = self._get_cookies('https://www.youtube.com/')
291 if cookies.get('__Secure-3PSID'):
292 return
293 consent_id = None
294 consent = cookies.get('CONSENT')
295 if consent:
296 if 'YES' in consent.value:
297 return
298 consent_id = self._search_regex(
299 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
300 if not consent_id:
301 consent_id = random.randint(100, 999)
302 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 303
b2e8bc1b 304 def _real_initialize(self):
cce889b9 305 self._initialize_consent()
b2e8bc1b
JMF
306 if self._downloader is None:
307 return
b2e8bc1b
JMF
308 if not self._login():
309 return
c5e8d7af 310
a0566bbf 311 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 312 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
313 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 314
109dd3b2 315 _YT_DEFAULT_YTCFGS = {
316 'WEB': {
317 'INNERTUBE_API_VERSION': 'v1',
318 'INNERTUBE_CLIENT_NAME': 'WEB',
319 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
320 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
321 'INNERTUBE_CONTEXT': {
322 'client': {
323 'clientName': 'WEB',
324 'clientVersion': '2.20210622.10.00',
325 'hl': 'en',
326 }
327 },
328 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
329 },
c0bc527b
M
330 'WEB_AGEGATE': {
331 'INNERTUBE_API_VERSION': 'v1',
332 'INNERTUBE_CLIENT_NAME': 'WEB',
333 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
334 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
335 'INNERTUBE_CONTEXT': {
336 'client': {
337 'clientName': 'WEB',
338 'clientVersion': '2.20210622.10.00',
339 'clientScreen': 'EMBED',
340 'hl': 'en',
341 }
342 },
343 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
344 },
109dd3b2 345 'WEB_REMIX': {
346 'INNERTUBE_API_VERSION': 'v1',
347 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
348 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
349 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
350 'INNERTUBE_CONTEXT': {
351 'client': {
352 'clientName': 'WEB_REMIX',
353 'clientVersion': '1.20210621.00.00',
354 'hl': 'en',
355 }
356 },
357 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
358 },
359 'WEB_EMBEDDED_PLAYER': {
360 'INNERTUBE_API_VERSION': 'v1',
361 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
362 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
363 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
364 'INNERTUBE_CONTEXT': {
365 'client': {
366 'clientName': 'WEB_EMBEDDED_PLAYER',
367 'clientVersion': '1.20210620.0.1',
368 'hl': 'en',
369 }
370 },
371 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
372 },
373 'ANDROID': {
374 'INNERTUBE_API_VERSION': 'v1',
375 'INNERTUBE_CLIENT_NAME': 'ANDROID',
376 'INNERTUBE_CLIENT_VERSION': '16.20',
377 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
378 'INNERTUBE_CONTEXT': {
379 'client': {
380 'clientName': 'ANDROID',
381 'clientVersion': '16.20',
382 'hl': 'en',
383 }
384 },
fe93e2c4 385 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
109dd3b2 386 },
c0bc527b
M
387 'ANDROID_AGEGATE': {
388 'INNERTUBE_API_VERSION': 'v1',
389 'INNERTUBE_CLIENT_NAME': 'ANDROID',
390 'INNERTUBE_CLIENT_VERSION': '16.20',
391 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
392 'INNERTUBE_CONTEXT': {
393 'client': {
394 'clientName': 'ANDROID',
395 'clientVersion': '16.20',
396 'clientScreen': 'EMBED',
397 'hl': 'en',
398 }
399 },
400 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
401 },
109dd3b2 402 'ANDROID_EMBEDDED_PLAYER': {
403 'INNERTUBE_API_VERSION': 'v1',
404 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
405 'INNERTUBE_CLIENT_VERSION': '16.20',
406 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
407 'INNERTUBE_CONTEXT': {
408 'client': {
409 'clientName': 'ANDROID_EMBEDDED_PLAYER',
410 'clientVersion': '16.20',
411 'hl': 'en',
412 }
413 },
fe93e2c4 414 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
109dd3b2 415 },
416 'ANDROID_MUSIC': {
417 'INNERTUBE_API_VERSION': 'v1',
418 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
419 'INNERTUBE_CLIENT_VERSION': '4.32',
420 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
421 'INNERTUBE_CONTEXT': {
422 'client': {
423 'clientName': 'ANDROID_MUSIC',
424 'clientVersion': '4.32',
425 'hl': 'en',
426 }
427 },
fe93e2c4 428 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
11f9be09 429 },
430 'IOS': {
431 'INNERTUBE_API_VERSION': 'v1',
432 'INNERTUBE_CLIENT_NAME': 'IOS',
433 'INNERTUBE_CLIENT_VERSION': '16.20',
434 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
435 'INNERTUBE_CONTEXT': {
436 'client': {
437 'clientName': 'IOS',
438 'clientVersion': '16.20',
439 'hl': 'en',
440 }
441 },
442 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
c0bc527b
M
443 },
444 'IOS_AGEGATE': {
445 'INNERTUBE_API_VERSION': 'v1',
446 'INNERTUBE_CLIENT_NAME': 'IOS',
447 'INNERTUBE_CLIENT_VERSION': '16.20',
448 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
449 'INNERTUBE_CONTEXT': {
450 'client': {
451 'clientName': 'IOS',
452 'clientVersion': '16.20',
453 'clientScreen': 'EMBED',
454 'hl': 'en',
455 }
456 },
457 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
11f9be09 458 },
459 'IOS_MUSIC': {
460 'INNERTUBE_API_VERSION': 'v1',
461 'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
462 'INNERTUBE_CLIENT_VERSION': '4.32',
463 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
464 'INNERTUBE_CONTEXT': {
465 'client': {
466 'clientName': 'IOS_MUSIC',
467 'clientVersion': '4.32',
468 'hl': 'en',
469 }
470 },
471 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
472 },
473 'IOS_MESSAGES_EXTENSION': {
474 'INNERTUBE_API_VERSION': 'v1',
475 'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
476 'INNERTUBE_CLIENT_VERSION': '16.20',
477 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
478 'INNERTUBE_CONTEXT': {
479 'client': {
480 'clientName': 'IOS_MESSAGES_EXTENSION',
481 'clientVersion': '16.20',
482 'hl': 'en',
483 }
484 },
485 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
5a1fc62b 486 },
487 'MWEB': {
488 'INNERTUBE_API_VERSION': 'v1',
489 'INNERTUBE_CLIENT_NAME': 'MWEB',
490 'INNERTUBE_CLIENT_VERSION': '2.20210721.07.00',
491 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
492 'INNERTUBE_CONTEXT': {
493 'client': {
494 'clientName': 'MWEB',
495 'clientVersion': '2.20210721.07.00',
496 'hl': 'en',
497 }
498 },
499 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
500 },
c0bc527b
M
501 'MWEB_AGEGATE': {
502 'INNERTUBE_API_VERSION': 'v1',
503 'INNERTUBE_CLIENT_NAME': 'MWEB',
504 'INNERTUBE_CLIENT_VERSION': '2.20210721.07.00',
505 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
506 'INNERTUBE_CONTEXT': {
507 'client': {
508 'clientName': 'MWEB',
509 'clientVersion': '2.20210721.07.00',
510 'clientScreen': 'EMBED',
511 'hl': 'en',
512 }
513 },
514 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
515 },
109dd3b2 516 }
517
518 _YT_DEFAULT_INNERTUBE_HOSTS = {
519 'DIRECT': 'youtubei.googleapis.com',
520 'WEB': 'www.youtube.com',
521 'WEB_REMIX': 'music.youtube.com',
522 'ANDROID_MUSIC': 'music.youtube.com'
523 }
524
11f9be09 525 # clients starting with _ cannot be explicity requested by the user
526 _YT_CLIENTS = {
11f9be09 527 'android': 'ANDROID',
528 'android_music': 'ANDROID_MUSIC',
c0bc527b
M
529 'android_embedded': 'ANDROID_EMBEDDED_PLAYER',
530 'android_agegate': 'ANDROID_AGEGATE',
11f9be09 531 'ios': 'IOS',
532 'ios_music': 'IOS_MUSIC',
c0bc527b
M
533 'ios_embedded': 'IOS_MESSAGES_EXTENSION',
534 'ios_agegate': 'IOS_AGEGATE',
b4c055ba 535 'web': 'WEB',
536 'web_music': 'WEB_REMIX',
c0bc527b
M
537 'web_embedded': 'WEB_EMBEDDED_PLAYER',
538 'web_agegate': 'WEB_AGEGATE',
539 'mweb': 'MWEB',
540 'mweb_agegate': 'MWEB_AGEGATE',
11f9be09 541 }
542
109dd3b2 543 def _get_default_ytcfg(self, client='WEB'):
544 if client in self._YT_DEFAULT_YTCFGS:
545 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
546 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
547 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
548
549 def _get_innertube_host(self, client='WEB'):
550 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
551
552 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
553 # try_get but with fallback to default ytcfg client values when present
554 _func = lambda y: try_get(y, getter, expected_type)
555 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
556
557 def _extract_client_name(self, ytcfg, default_client='WEB'):
558 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
559
314ee305 560 @staticmethod
11f9be09 561 def _extract_session_index(*data):
562 for ytcfg in data:
563 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
564 if session_index is not None:
565 return session_index
314ee305 566
109dd3b2 567 def _extract_client_version(self, ytcfg, default_client='WEB'):
568 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
569
570 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
571 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
572
573 def _extract_context(self, ytcfg=None, default_client='WEB'):
574 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
575 context = _get_context(ytcfg)
576 if context:
577 return context
578
579 context = _get_context(self._get_default_ytcfg(default_client))
580 if not ytcfg:
581 return context
582
583 # Recreate the client context (required)
584 context['client'].update({
585 'clientVersion': self._extract_client_version(ytcfg, default_client),
586 'clientName': self._extract_client_name(ytcfg, default_client),
587 })
588 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
589 if visitor_data:
590 context['client']['visitorData'] = visitor_data
591 return context
592
593 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 594 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
595 # See: https://github.com/yt-dlp/yt-dlp/issues/393
596 yt_cookies = self._get_cookies('https://www.youtube.com')
597 sapisid_cookie = dict_get(
598 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
c926c954 599 if sapisid_cookie is None or not sapisid_cookie.value:
a5c56234
M
600 return
601 time_now = round(time.time())
1974e99f 602 # SAPISID cookie is required if not already present
603 if not yt_cookies.get('SAPISID'):
c926c954 604 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
1974e99f 605 self._set_cookie(
606 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
c926c954 607 self.write_debug('Extracted SAPISID cookie', only_once=True)
1974e99f 608 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
609 sapisidhash = hashlib.sha1(
109dd3b2 610 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 611 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
612
613 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 614 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 615 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 616
109dd3b2 617 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 618 data.update(query)
11f9be09 619 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 620 real_headers.update({'content-type': 'application/json'})
621 if headers:
622 real_headers.update(headers)
545cc85d 623 return self._download_json(
109dd3b2 624 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 625 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 626 data=json.dumps(data).encode('utf8'), headers=real_headers,
627 query={'key': api_key or self._extract_api_key()})
628
11f9be09 629 def extract_yt_initial_data(self, video_id, webpage):
8bdd16b4 630 return self._parse_json(
631 self._search_regex(
29f7c58a 632 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 633 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 634 video_id)
0c148415 635
a1c5d2ca 636 def _extract_identity_token(self, webpage, item_id):
11f9be09 637 if not webpage:
638 return None
639 ytcfg = self.extract_ytcfg(item_id, webpage)
a1c5d2ca
M
640 if ytcfg:
641 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
642 if token:
643 return token
644 return self._search_regex(
645 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
646 'identity token', default=None)
647
648 @staticmethod
fe93e2c4 649 def _extract_account_syncid(*args):
8ea3f7b9 650 """
651 Extract syncId required to download private playlists of secondary channels
fe93e2c4 652 @params response and/or ytcfg
8ea3f7b9 653 """
fe93e2c4 654 for data in args:
655 # ytcfg includes channel_syncid if on secondary channel
656 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
657 if delegated_sid:
658 return delegated_sid
659 sync_ids = (try_get(
660 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
661 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
662 if len(sync_ids) >= 2 and sync_ids[1]:
663 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
664 # and just "user_syncid||" for primary channel. We only want the channel_syncid
665 return sync_ids[0]
a1c5d2ca 666
11f9be09 667 def extract_ytcfg(self, video_id, webpage):
8c54a305 668 if not webpage:
669 return {}
29f7c58a 670 return self._parse_json(
671 self._search_regex(
672 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 673 default='{}'), video_id, fatal=False) or {}
674
11f9be09 675 def generate_api_headers(
676 self, ytcfg=None, identity_token=None, account_syncid=None,
677 visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
678 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 679 headers = {
109dd3b2 680 'X-YouTube-Client-Name': compat_str(
11f9be09 681 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
682 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
109dd3b2 683 'Origin': origin
f4f751af 684 }
2d6659b9 685 if not visitor_data and ytcfg:
686 visitor_data = try_get(
11f9be09 687 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 688 if identity_token:
109dd3b2 689 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 690 if account_syncid:
691 headers['X-Goog-PageId'] = account_syncid
314ee305 692 if session_index is None and ytcfg:
693 session_index = self._extract_session_index(ytcfg)
694 if account_syncid or session_index is not None:
695 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 696 if visitor_data:
109dd3b2 697 headers['X-Goog-Visitor-Id'] = visitor_data
698 auth = self._generate_sapisidhash_header(origin)
f4f751af 699 if auth is not None:
700 headers['Authorization'] = auth
109dd3b2 701 headers['X-Origin'] = origin
f4f751af 702 return headers
29f7c58a 703
2d6659b9 704 @staticmethod
705 def _build_api_continuation_query(continuation, ctp=None):
706 query = {
707 'continuation': continuation
708 }
709 # TODO: Inconsistency with clickTrackingParams.
710 # Currently we have a fixed ctp contained within context (from ytcfg)
711 # and a ctp in root query for continuation.
712 if ctp:
713 query['clickTracking'] = {'clickTrackingParams': ctp}
714 return query
715
2d6659b9 716 @classmethod
717 def _extract_next_continuation_data(cls, renderer):
718 next_continuation = try_get(
719 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
720 lambda x: x['continuation']['reloadContinuationData']), dict)
721 if not next_continuation:
722 return
723 continuation = next_continuation.get('continuation')
724 if not continuation:
725 return
726 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 727 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 728
729 @classmethod
730 def _extract_continuation_ep_data(cls, continuation_ep: dict):
731 if isinstance(continuation_ep, dict):
732 continuation = try_get(
733 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
734 if not continuation:
735 return
736 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 737 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 738
739 @classmethod
740 def _extract_continuation(cls, renderer):
741 next_continuation = cls._extract_next_continuation_data(renderer)
742 if next_continuation:
743 return next_continuation
fe93e2c4 744
2d6659b9 745 contents = []
746 for key in ('contents', 'items'):
747 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 748
2d6659b9 749 for content in contents:
750 if not isinstance(content, dict):
751 continue
752 continuation_ep = try_get(
753 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
754 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
755 dict)
756 continuation = cls._extract_continuation_ep_data(continuation_ep)
757 if continuation:
758 return continuation
759
fe93e2c4 760 @classmethod
761 def _extract_alerts(cls, data):
109dd3b2 762 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
763 if not isinstance(alert_dict, dict):
764 continue
765 for alert in alert_dict.values():
766 alert_type = alert.get('type')
767 if not alert_type:
768 continue
052e1350 769 message = cls._get_text(alert, 'text')
109dd3b2 770 if message:
771 yield alert_type, message
772
773 def _report_alerts(self, alerts, expected=True):
774 errors = []
775 warnings = []
776 for alert_type, alert_message in alerts:
777 if alert_type.lower() == 'error':
778 errors.append([alert_type, alert_message])
779 else:
780 warnings.append([alert_type, alert_message])
781
782 for alert_type, alert_message in (warnings + errors[:-1]):
783 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
784 if errors:
785 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
786
787 def _extract_and_report_alerts(self, data, *args, **kwargs):
788 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
789
47193e02 790 def _extract_badges(self, renderer: dict):
791 badges = set()
792 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
793 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
794 if label:
795 badges.add(label.lower())
796 return badges
797
798 @staticmethod
052e1350 799 def _get_text(data, *path_list, max_runs=None):
800 for path in path_list or [None]:
801 if path is None:
802 obj = [data]
803 else:
804 obj = traverse_obj(data, path, default=[])
805 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
806 obj = [obj]
807 for item in obj:
808 text = try_get(item, lambda x: x['simpleText'], compat_str)
809 if text:
810 return text
811 runs = try_get(item, lambda x: x['runs'], list) or []
812 if not runs and isinstance(item, list):
813 runs = item
814
815 runs = runs[:min(len(runs), max_runs or len(runs))]
816 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
817 if text:
818 return text
47193e02 819
109dd3b2 820 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
821 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
822 default_client='WEB'):
823 response = None
824 last_error = None
825 count = -1
826 retries = self.get_param('extractor_retries', 3)
827 if check_get_keys is None:
828 check_get_keys = []
829 while count < retries:
830 count += 1
831 if last_error:
832 self.report_warning('%s. Retrying ...' % last_error)
833 try:
834 response = self._call_api(
835 ep=ep, fatal=True, headers=headers,
836 video_id=item_id, query=query,
837 context=self._extract_context(ytcfg, default_client),
838 api_key=self._extract_api_key(ytcfg, default_client),
839 api_hostname=api_hostname, default_client=default_client,
840 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
841 except ExtractorError as e:
9c0d7f49 842 if isinstance(e.cause, network_exceptions):
109dd3b2 843 # Downloading page may result in intermittent 5xx HTTP error
844 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 845 # We also want to catch all other network exceptions since errors in later pages can be troublesome
846 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
847 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
848 last_error = error_to_compat_str(e.cause or e)
849 if count < retries:
850 continue
109dd3b2 851 if fatal:
852 raise
853 else:
854 self.report_warning(error_to_compat_str(e))
855 return
856
857 else:
858 # Youtube may send alerts if there was an issue with the continuation page
859 try:
860 self._extract_and_report_alerts(response, expected=False)
861 except ExtractorError as e:
862 if fatal:
863 raise
864 self.report_warning(error_to_compat_str(e))
865 return
866 if not check_get_keys or dict_get(response, check_get_keys):
867 break
868 # Youtube sometimes sends incomplete data
869 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
870 last_error = 'Incomplete data received'
871 if count >= retries:
872 if fatal:
873 raise ExtractorError(last_error)
874 else:
875 self.report_warning(last_error)
876 return
877 return response
878
9297939e 879 @staticmethod
880 def is_music_url(url):
881 return re.match(r'https?://music\.youtube\.com/', url) is not None
882
30a074c2 883 def _extract_video(self, renderer):
884 video_id = renderer.get('videoId')
052e1350 885 title = self._get_text(renderer, 'title')
886 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 887 duration = parse_duration(self._get_text(
888 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 889 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 890 view_count = str_to_int(self._search_regex(
891 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
892 'view count', default=None))
fe93e2c4 893
052e1350 894 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
fe93e2c4 895
30a074c2 896 return {
39ed931e 897 '_type': 'url',
30a074c2 898 'ie_key': YoutubeIE.ie_key(),
899 'id': video_id,
900 'url': video_id,
901 'title': title,
902 'description': description,
903 'duration': duration,
904 'view_count': view_count,
905 'uploader': uploader,
906 }
907
0c148415 908
360e1ca5 909class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 910 IE_DESC = 'YouTube.com'
bc2ca1bb 911 _INVIDIOUS_SITES = (
912 # invidious-redirect websites
913 r'(?:www\.)?redirect\.invidious\.io',
914 r'(?:(?:www|dev)\.)?invidio\.us',
915 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
916 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 917 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 918 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 919 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 920 # youtube-dl invidious instances list
921 r'(?:(?:www|no)\.)?invidiou\.sh',
922 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
923 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 924 r'(?:www\.)?invidious\.mastodon\.host',
925 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 926 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 927 r'(?:www\.)?invidious\.tinfoil-hat\.net',
928 r'(?:www\.)?invidious\.himiko\.cloud',
929 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 930 r'(?:www\.)?invidious\.tube',
931 r'(?:www\.)?invidiou\.site',
932 r'(?:www\.)?invidious\.site',
933 r'(?:www\.)?invidious\.xyz',
934 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 935 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 936 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 937 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 938 r'(?:www\.)?tube\.poal\.co',
939 r'(?:www\.)?tube\.connect\.cafe',
940 r'(?:www\.)?vid\.wxzm\.sx',
941 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 942 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 943 r'(?:www\.)?yewtu\.be',
944 r'(?:www\.)?yt\.elukerio\.org',
945 r'(?:www\.)?yt\.lelux\.fi',
946 r'(?:www\.)?invidious\.ggc-project\.de',
947 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 948 r'(?:www\.)?ytprivate\.com',
949 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 950 r'(?:www\.)?invidious\.toot\.koeln',
951 r'(?:www\.)?invidious\.fdn\.fr',
952 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 953 r'(?:www\.)?invidious\.namazso\.eu',
954 r'(?:www\.)?invidious\.silkky\.cloud',
955 r'(?:www\.)?invidious\.exonip\.de',
956 r'(?:www\.)?invidious\.riverside\.rocks',
957 r'(?:www\.)?invidious\.blamefran\.net',
958 r'(?:www\.)?invidious\.moomoo\.de',
959 r'(?:www\.)?ytb\.trom\.tf',
960 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 961 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
962 r'(?:www\.)?qklhadlycap4cnod\.onion',
963 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
964 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
965 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
966 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
967 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
968 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 969 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
970 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
971 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
972 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 973 )
cb7dfeea 974 _VALID_URL = r"""(?x)^
c5e8d7af 975 (
edb53e2d 976 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 977 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
978 (?:www\.)?deturl\.com/www\.youtube\.com|
979 (?:www\.)?pwnyoutube\.com|
980 (?:www\.)?hooktube\.com|
981 (?:www\.)?yourepeat\.com|
982 tube\.majestyc\.net|
983 %(invidious)s|
984 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
985 (?:.*?\#/)? # handle anchor (#/) redirect urls
986 (?: # the various things that can precede the ID:
ac7553d0 987 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 988 |(?: # or the v= param in all its forms
f7000f3a 989 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 990 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 991 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
992 v=
993 )
f4b05232 994 ))
cbaed4bb
S
995 |(?:
996 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
997 vid\.plus| # or vid.plus/xxxx
998 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 999 %(invidious)s
cbaed4bb 1000 )/
edb53e2d 1001 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 1002 )
c5e8d7af 1003 )? # all until now is optional -> you can pass the naked ID
201c1459 1004 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 1005 (?(1).+)? # if we found the ID, everything can follow
9297939e 1006 (?:\#|$)""" % {
bc2ca1bb 1007 'invidious': '|'.join(_INVIDIOUS_SITES),
1008 }
e40c758c 1009 _PLAYER_INFO_RE = (
cc2db878 1010 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1011 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 1012 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 1013 )
2c62dc26 1014 _formats = {
c2d3cb4c 1015 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1016 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1017 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1018 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1019 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1020 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1021 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1022 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 1023 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 1024 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1025 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1026 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1027 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1028 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1029 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 1030 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 1031 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1032 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 1033
1034
1035 # 3D videos
c2d3cb4c 1036 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1037 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1038 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1039 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 1040 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1041 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1042 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 1043
96fb5605 1044 # Apple HTTP Live Streaming
11f12195 1045 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 1046 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1047 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1048 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1049 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1050 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 1051 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1052 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
1053
1054 # DASH mp4 video
d23028a8
S
1055 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1056 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1057 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1058 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1059 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 1060 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
1061 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1062 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1063 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1064 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1065 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1066 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1067
f6f1fc92 1068 # Dash mp4 audio
d23028a8
S
1069 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1070 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1071 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1072 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1073 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1074 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1075 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1076
1077 # Dash webm
d23028a8
S
1078 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1079 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1080 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1081 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1082 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1083 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1084 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1085 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1086 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1087 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1088 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1089 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1090 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1091 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1092 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1093 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1094 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1095 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1096 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1097 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1098 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1099 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1100
1101 # Dash webm audio
d23028a8
S
1102 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1103 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1104
0857baad 1105 # Dash webm audio with opus inside
d23028a8
S
1106 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1107 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1108 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1109
ce6b9a2d
PH
1110 # RTMP (unnamed)
1111 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1112
1113 # av01 video only formats sometimes served with "unknown" codecs
1114 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1115 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1116 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1117 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 1118 }
29f7c58a 1119 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1120
109dd3b2 1121 _AGE_GATE_REASONS = (
1122 'Sign in to confirm your age',
1123 'This video may be inappropriate for some users.',
1124 'Sorry, this content is age-restricted.')
1125
fd5c4aab
S
1126 _GEO_BYPASS = False
1127
78caa52a 1128 IE_NAME = 'youtube'
2eb88d95
PH
1129 _TESTS = [
1130 {
2d3d2997 1131 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1132 'info_dict': {
1133 'id': 'BaW_jenozKc',
1134 'ext': 'mp4',
3867038a 1135 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1136 'uploader': 'Philipp Hagemeister',
1137 'uploader_id': 'phihag',
ec85ded8 1138 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
1139 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1140 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1141 'upload_date': '20121002',
3867038a 1142 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 1143 'categories': ['Science & Technology'],
3867038a 1144 'tags': ['youtube-dl'],
556dbe7f 1145 'duration': 10,
dbdaaa23 1146 'view_count': int,
3e7c1224
PH
1147 'like_count': int,
1148 'dislike_count': int,
7c80519c 1149 'start_time': 1,
297a564b 1150 'end_time': 9,
2eb88d95 1151 }
0e853ca4 1152 },
fccd3771 1153 {
4bc3a23e
PH
1154 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1155 'note': 'Embed-only video (#1746)',
1156 'info_dict': {
1157 'id': 'yZIXLfi8CZQ',
1158 'ext': 'mp4',
1159 'upload_date': '20120608',
1160 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1161 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1162 'uploader': 'SET India',
94bfcd23 1163 'uploader_id': 'setindia',
ec85ded8 1164 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1165 'age_limit': 18,
545cc85d 1166 },
1167 'skip': 'Private video',
fccd3771 1168 },
11b56058 1169 {
8bdd16b4 1170 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1171 'note': 'Use the first video ID in the URL',
1172 'info_dict': {
1173 'id': 'BaW_jenozKc',
1174 'ext': 'mp4',
3867038a 1175 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1176 'uploader': 'Philipp Hagemeister',
1177 'uploader_id': 'phihag',
ec85ded8 1178 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1179 'upload_date': '20121002',
3867038a 1180 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1181 'categories': ['Science & Technology'],
3867038a 1182 'tags': ['youtube-dl'],
556dbe7f 1183 'duration': 10,
dbdaaa23 1184 'view_count': int,
11b56058
PM
1185 'like_count': int,
1186 'dislike_count': int,
34a7de29
S
1187 },
1188 'params': {
1189 'skip_download': True,
1190 },
11b56058 1191 },
dd27fd17 1192 {
2d3d2997 1193 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1194 'note': '256k DASH audio (format 141) via DASH manifest',
1195 'info_dict': {
1196 'id': 'a9LDPn-MO4I',
1197 'ext': 'm4a',
1198 'upload_date': '20121002',
1199 'uploader_id': '8KVIDEO',
ec85ded8 1200 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1201 'description': '',
1202 'uploader': '8KVIDEO',
1203 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1204 },
4bc3a23e
PH
1205 'params': {
1206 'youtube_include_dash_manifest': True,
1207 'format': '141',
4919603f 1208 },
de3c7fe0 1209 'skip': 'format 141 not served anymore',
dd27fd17 1210 },
8bdd16b4 1211 # DASH manifest with encrypted signature
1212 {
1213 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1214 'info_dict': {
1215 'id': 'IB3lcPjvWLA',
1216 'ext': 'm4a',
1217 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1218 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1219 'duration': 244,
1220 'uploader': 'AfrojackVEVO',
1221 'uploader_id': 'AfrojackVEVO',
1222 'upload_date': '20131011',
cc2db878 1223 'abr': 129.495,
8bdd16b4 1224 },
1225 'params': {
1226 'youtube_include_dash_manifest': True,
1227 'format': '141/bestaudio[ext=m4a]',
1228 },
1229 },
dd2d55f1 1230 # Normal age-gate video (embed allowed)
c522adb1 1231 {
2d3d2997 1232 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1233 'info_dict': {
1234 'id': 'HtVdAasjOgU',
1235 'ext': 'mp4',
1236 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1237 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1238 'duration': 142,
c522adb1
JMF
1239 'uploader': 'The Witcher',
1240 'uploader_id': 'WitcherGame',
ec85ded8 1241 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1242 'upload_date': '20140605',
34952f09 1243 'age_limit': 18,
c522adb1
JMF
1244 },
1245 },
8bdd16b4 1246 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1247 # YouTube Red ad is not captured for creator
1248 {
1249 'url': '__2ABJjxzNo',
1250 'info_dict': {
1251 'id': '__2ABJjxzNo',
1252 'ext': 'mp4',
1253 'duration': 266,
1254 'upload_date': '20100430',
1255 'uploader_id': 'deadmau5',
1256 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1257 'creator': 'deadmau5',
1258 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1259 'uploader': 'deadmau5',
1260 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1261 'alt_title': 'Some Chords',
8bdd16b4 1262 },
1263 'expected_warnings': [
1264 'DASH manifest missing',
1265 ]
1266 },
067aa17e 1267 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1268 {
1269 'url': 'lqQg6PlCWgI',
1270 'info_dict': {
1271 'id': 'lqQg6PlCWgI',
1272 'ext': 'mp4',
556dbe7f 1273 'duration': 6085,
90227264 1274 'upload_date': '20150827',
cbe2bd91 1275 'uploader_id': 'olympic',
ec85ded8 1276 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1277 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1278 'uploader': 'Olympics',
cbe2bd91
PH
1279 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1280 },
1281 'params': {
1282 'skip_download': 'requires avconv',
e52a40ab 1283 }
cbe2bd91 1284 },
6271f1ca
PH
1285 # Non-square pixels
1286 {
1287 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1288 'info_dict': {
1289 'id': '_b-2C3KPAM0',
1290 'ext': 'mp4',
1291 'stretched_ratio': 16 / 9.,
556dbe7f 1292 'duration': 85,
6271f1ca
PH
1293 'upload_date': '20110310',
1294 'uploader_id': 'AllenMeow',
ec85ded8 1295 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1296 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1297 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1298 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1299 },
06b491eb
S
1300 },
1301 # url_encoded_fmt_stream_map is empty string
1302 {
1303 'url': 'qEJwOuvDf7I',
1304 'info_dict': {
1305 'id': 'qEJwOuvDf7I',
f57b7835 1306 'ext': 'webm',
06b491eb
S
1307 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1308 'description': '',
1309 'upload_date': '20150404',
1310 'uploader_id': 'spbelect',
1311 'uploader': 'Наблюдатели Петербурга',
1312 },
1313 'params': {
1314 'skip_download': 'requires avconv',
e323cf3f
S
1315 },
1316 'skip': 'This live event has ended.',
06b491eb 1317 },
067aa17e 1318 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1319 {
1320 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1321 'info_dict': {
1322 'id': 'FIl7x6_3R5Y',
eb6793ba 1323 'ext': 'webm',
da77d856
S
1324 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1325 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1326 'duration': 220,
da77d856
S
1327 'upload_date': '20150625',
1328 'uploader_id': 'dorappi2000',
ec85ded8 1329 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1330 'uploader': 'dorappi2000',
eb6793ba 1331 'formats': 'mincount:31',
da77d856 1332 },
eb6793ba 1333 'skip': 'not actual anymore',
2ee8f5d8 1334 },
8a1a26ce
YCH
1335 # DASH manifest with segment_list
1336 {
1337 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1338 'md5': '8ce563a1d667b599d21064e982ab9e31',
1339 'info_dict': {
1340 'id': 'CsmdDsKjzN8',
1341 'ext': 'mp4',
17ee98e1 1342 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1343 'uploader': 'Airtek',
1344 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1345 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1346 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1347 },
1348 'params': {
1349 'youtube_include_dash_manifest': True,
1350 'format': '135', # bestvideo
be49068d
S
1351 },
1352 'skip': 'This live event has ended.',
2ee8f5d8 1353 },
cf7e015f
S
1354 {
1355 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1356 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1357 'info_dict': {
545cc85d 1358 'id': 'jvGDaLqkpTg',
1359 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1360 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1361 },
1362 'playlist': [{
1363 'info_dict': {
545cc85d 1364 'id': 'jvGDaLqkpTg',
cf7e015f 1365 'ext': 'mp4',
545cc85d 1366 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1367 'description': 'md5:e03b909557865076822aa169218d6a5d',
1368 'duration': 10643,
1369 'upload_date': '20161111',
1370 'uploader': 'Team PGP',
1371 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1372 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1373 },
1374 }, {
1375 'info_dict': {
545cc85d 1376 'id': '3AKt1R1aDnw',
cf7e015f 1377 'ext': 'mp4',
545cc85d 1378 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1379 'description': 'md5:e03b909557865076822aa169218d6a5d',
1380 'duration': 10991,
1381 'upload_date': '20161111',
1382 'uploader': 'Team PGP',
1383 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1384 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1385 },
1386 }, {
1387 'info_dict': {
545cc85d 1388 'id': 'RtAMM00gpVc',
cf7e015f 1389 'ext': 'mp4',
545cc85d 1390 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1391 'description': 'md5:e03b909557865076822aa169218d6a5d',
1392 'duration': 10995,
1393 'upload_date': '20161111',
1394 'uploader': 'Team PGP',
1395 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1396 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1397 },
1398 }, {
1399 'info_dict': {
545cc85d 1400 'id': '6N2fdlP3C5U',
cf7e015f 1401 'ext': 'mp4',
545cc85d 1402 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1403 'description': 'md5:e03b909557865076822aa169218d6a5d',
1404 'duration': 10990,
1405 'upload_date': '20161111',
1406 'uploader': 'Team PGP',
1407 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1408 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1409 },
1410 }],
1411 'params': {
1412 'skip_download': True,
1413 },
cbaed4bb 1414 },
f9f49d87 1415 {
067aa17e 1416 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1417 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1418 'info_dict': {
1419 'id': 'gVfLd0zydlo',
1420 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1421 },
1422 'playlist_count': 2,
be49068d 1423 'skip': 'Not multifeed anymore',
f9f49d87 1424 },
cbaed4bb 1425 {
2d3d2997 1426 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1427 'only_matching': True,
0e49d9a6 1428 },
6d4fc66b 1429 {
2d3d2997 1430 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1431 'only_matching': True,
1432 },
0e49d9a6 1433 {
067aa17e 1434 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1435 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1436 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1437 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1438 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1439 'info_dict': {
1440 'id': 'lsguqyKfVQg',
1441 'ext': 'mp4',
1442 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1443 'alt_title': 'Dark Walk',
0e49d9a6 1444 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1445 'duration': 133,
0e49d9a6
LL
1446 'upload_date': '20151119',
1447 'uploader_id': 'IronSoulElf',
ec85ded8 1448 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1449 'uploader': 'IronSoulElf',
11f9be09 1450 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1451 'track': 'Dark Walk',
1452 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1453 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1454 },
1455 'params': {
1456 'skip_download': True,
1457 },
1458 },
61f92af1 1459 {
067aa17e 1460 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1461 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1462 'only_matching': True,
1463 },
313dfc45
LL
1464 {
1465 # Video with yt:stretch=17:0
1466 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1467 'info_dict': {
1468 'id': 'Q39EVAstoRM',
1469 'ext': 'mp4',
1470 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1471 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1472 'upload_date': '20151107',
1473 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1474 'uploader': 'CH GAMER DROID',
1475 },
1476 'params': {
1477 'skip_download': True,
1478 },
be49068d 1479 'skip': 'This video does not exist.',
313dfc45 1480 },
201c1459 1481 {
1482 # Video with incomplete 'yt:stretch=16:'
1483 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1484 'only_matching': True,
1485 },
7caf9830
S
1486 {
1487 # Video licensed under Creative Commons
1488 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1489 'info_dict': {
1490 'id': 'M4gD1WSo5mA',
1491 'ext': 'mp4',
1492 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1493 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1494 'duration': 721,
7caf9830
S
1495 'upload_date': '20150127',
1496 'uploader_id': 'BerkmanCenter',
ec85ded8 1497 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1498 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1499 'license': 'Creative Commons Attribution license (reuse allowed)',
1500 },
1501 'params': {
1502 'skip_download': True,
1503 },
1504 },
fd050249
S
1505 {
1506 # Channel-like uploader_url
1507 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1508 'info_dict': {
1509 'id': 'eQcmzGIKrzg',
1510 'ext': 'mp4',
1511 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1512 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1513 'duration': 4060,
fd050249 1514 'upload_date': '20151119',
eb6793ba 1515 'uploader': 'Bernie Sanders',
fd050249 1516 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1517 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1518 'license': 'Creative Commons Attribution license (reuse allowed)',
1519 },
1520 'params': {
1521 'skip_download': True,
1522 },
1523 },
040ac686
S
1524 {
1525 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1526 'only_matching': True,
7f29cf54
S
1527 },
1528 {
067aa17e 1529 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1530 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1531 'only_matching': True,
6496ccb4
S
1532 },
1533 {
1534 # Rental video preview
1535 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1536 'info_dict': {
1537 'id': 'uGpuVWrhIzE',
1538 'ext': 'mp4',
1539 'title': 'Piku - Trailer',
1540 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1541 'upload_date': '20150811',
1542 'uploader': 'FlixMatrix',
1543 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1544 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1545 'license': 'Standard YouTube License',
1546 },
1547 'params': {
1548 'skip_download': True,
1549 },
eb6793ba 1550 'skip': 'This video is not available.',
022a5d66 1551 },
12afdc2a
S
1552 {
1553 # YouTube Red video with episode data
1554 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1555 'info_dict': {
1556 'id': 'iqKdEhx-dD4',
1557 'ext': 'mp4',
1558 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1559 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1560 'duration': 2085,
12afdc2a
S
1561 'upload_date': '20170118',
1562 'uploader': 'Vsauce',
1563 'uploader_id': 'Vsauce',
1564 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1565 'series': 'Mind Field',
1566 'season_number': 1,
1567 'episode_number': 1,
1568 },
1569 'params': {
1570 'skip_download': True,
1571 },
1572 'expected_warnings': [
1573 'Skipping DASH manifest',
1574 ],
1575 },
c7121fa7
S
1576 {
1577 # The following content has been identified by the YouTube community
1578 # as inappropriate or offensive to some audiences.
1579 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1580 'info_dict': {
1581 'id': '6SJNVb0GnPI',
1582 'ext': 'mp4',
1583 'title': 'Race Differences in Intelligence',
1584 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1585 'duration': 965,
1586 'upload_date': '20140124',
1587 'uploader': 'New Century Foundation',
1588 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1589 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1590 },
1591 'params': {
1592 'skip_download': True,
1593 },
545cc85d 1594 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1595 },
022a5d66
S
1596 {
1597 # itag 212
1598 'url': '1t24XAntNCY',
1599 'only_matching': True,
fd5c4aab
S
1600 },
1601 {
1602 # geo restricted to JP
1603 'url': 'sJL6WA-aGkQ',
1604 'only_matching': True,
1605 },
cd5a74a2
S
1606 {
1607 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1608 'only_matching': True,
1609 },
bc2ca1bb 1610 {
1611 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1612 'only_matching': True,
1613 },
1614 {
1615 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1616 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1617 'only_matching': True,
1618 },
825cd268
RA
1619 {
1620 # DRM protected
1621 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1622 'only_matching': True,
4fe54c12
S
1623 },
1624 {
1625 # Video with unsupported adaptive stream type formats
1626 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1627 'info_dict': {
1628 'id': 'Z4Vy8R84T1U',
1629 'ext': 'mp4',
1630 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1631 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1632 'duration': 433,
1633 'upload_date': '20130923',
1634 'uploader': 'Amelia Putri Harwita',
1635 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1636 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1637 'formats': 'maxcount:10',
1638 },
1639 'params': {
1640 'skip_download': True,
1641 'youtube_include_dash_manifest': False,
1642 },
5429d6a9 1643 'skip': 'not actual anymore',
5caabd3c 1644 },
1645 {
822b9d9c 1646 # Youtube Music Auto-generated description
5caabd3c 1647 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1648 'info_dict': {
1649 'id': 'MgNrAu2pzNs',
1650 'ext': 'mp4',
1651 'title': 'Voyeur Girl',
1652 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1653 'upload_date': '20190312',
5429d6a9
S
1654 'uploader': 'Stephen - Topic',
1655 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1656 'artist': 'Stephen',
1657 'track': 'Voyeur Girl',
1658 'album': 'it\'s too much love to know my dear',
1659 'release_date': '20190313',
1660 'release_year': 2019,
1661 },
1662 'params': {
1663 'skip_download': True,
1664 },
1665 },
66b48727
RA
1666 {
1667 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1668 'only_matching': True,
1669 },
011e75e6
S
1670 {
1671 # invalid -> valid video id redirection
1672 'url': 'DJztXj2GPfl',
1673 'info_dict': {
1674 'id': 'DJztXj2GPfk',
1675 'ext': 'mp4',
1676 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1677 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1678 'upload_date': '20090125',
1679 'uploader': 'Prochorowka',
1680 'uploader_id': 'Prochorowka',
1681 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1682 'artist': 'Panjabi MC',
1683 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1684 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1685 },
1686 'params': {
1687 'skip_download': True,
1688 },
545cc85d 1689 'skip': 'Video unavailable',
ea74e00b
DP
1690 },
1691 {
1692 # empty description results in an empty string
1693 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1694 'info_dict': {
1695 'id': 'x41yOUIvK2k',
1696 'ext': 'mp4',
1697 'title': 'IMG 3456',
1698 'description': '',
1699 'upload_date': '20170613',
1700 'uploader_id': 'ElevageOrVert',
1701 'uploader': 'ElevageOrVert',
1702 },
1703 'params': {
1704 'skip_download': True,
1705 },
1706 },
a0566bbf 1707 {
29f7c58a 1708 # with '};' inside yt initial data (see [1])
1709 # see [2] for an example with '};' inside ytInitialPlayerResponse
1710 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1711 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1712 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1713 'info_dict': {
1714 'id': 'CHqg6qOn4no',
1715 'ext': 'mp4',
1716 'title': 'Part 77 Sort a list of simple types in c#',
1717 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1718 'upload_date': '20130831',
1719 'uploader_id': 'kudvenkat',
1720 'uploader': 'kudvenkat',
1721 },
1722 'params': {
1723 'skip_download': True,
1724 },
1725 },
29f7c58a 1726 {
1727 # another example of '};' in ytInitialData
1728 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1729 'only_matching': True,
1730 },
1731 {
1732 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1733 'only_matching': True,
1734 },
545cc85d 1735 {
cc2db878 1736 # https://github.com/ytdl-org/youtube-dl/pull/28094
1737 'url': 'OtqTfy26tG0',
1738 'info_dict': {
1739 'id': 'OtqTfy26tG0',
1740 'ext': 'mp4',
1741 'title': 'Burn Out',
1742 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1743 'upload_date': '20141120',
1744 'uploader': 'The Cinematic Orchestra - Topic',
1745 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1746 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1747 'artist': 'The Cinematic Orchestra',
1748 'track': 'Burn Out',
1749 'album': 'Every Day',
1750 'release_data': None,
1751 'release_year': None,
1752 },
1753 'params': {
1754 'skip_download': True,
1755 },
545cc85d 1756 },
bc2ca1bb 1757 {
1758 # controversial video, only works with bpctr when authenticated with cookies
1759 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1760 'only_matching': True,
1761 },
a1a7907b 1762 {
1763 # controversial video, requires bpctr/contentCheckOk
1764 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1765 'info_dict': {
1766 'id': 'SZJvDhaSDnc',
1767 'ext': 'mp4',
1768 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1769 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1770 'uploader': 'CBS This Morning',
11f9be09 1771 'uploader_id': 'CBSThisMorning',
a1a7907b 1772 'upload_date': '20140716',
1773 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1774 }
1775 },
f7ad7160 1776 {
1777 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1778 'url': 'cBvYw8_A0vQ',
1779 'info_dict': {
1780 'id': 'cBvYw8_A0vQ',
1781 'ext': 'mp4',
1782 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1783 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1784 'upload_date': '20201120',
1785 'uploader': 'Walk around Japan',
1786 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1787 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1788 },
1789 'params': {
1790 'skip_download': True,
1791 },
0fb983f6 1792 }, {
1793 # Has multiple audio streams
1794 'url': 'WaOKSUlf4TM',
1795 'only_matching': True
9297939e 1796 }, {
1797 # Requires Premium: has format 141 when requested using YTM url
1798 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1799 'only_matching': True
1800 }, {
120916da 1801 # multiple subtitles with same lang_code
1802 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1803 'only_matching': True,
109dd3b2 1804 }, {
1805 # Force use android client fallback
1806 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1807 'info_dict': {
1808 'id': 'YOelRv7fMxY',
11f9be09 1809 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1810 'ext': '3gp',
1811 'upload_date': '20210624',
1812 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1813 'uploader': 'colinfurze',
11f9be09 1814 'uploader_id': 'colinfurze',
109dd3b2 1815 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1816 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1817 },
1818 'params': {
1819 'format': '17', # 3gp format available on android
1820 'extractor_args': {'youtube': {'player_client': ['android']}},
1821 },
120916da 1822 },
109dd3b2 1823 {
1824 # Skip download of additional client configs (remix client config in this case)
1825 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1826 'only_matching': True,
1827 'params': {
1828 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1829 },
1830 }
2eb88d95
PH
1831 ]
1832
201c1459 1833 @classmethod
1834 def suitable(cls, url):
1bdae7d3 1835 # Hack for lazy extractors until more generic solution is implemented
1836 # (see #28780)
1837 from .youtube import parse_qs
201c1459 1838 qs = parse_qs(url)
1839 if qs.get('list', [None])[0]:
1840 return False
1841 return super(YoutubeIE, cls).suitable(url)
1842
e0df6211
PH
1843 def __init__(self, *args, **kwargs):
1844 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1845 self._code_cache = {}
83799698 1846 self._player_cache = {}
e0df6211 1847
109dd3b2 1848 def _extract_player_url(self, ytcfg=None, webpage=None):
1849 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
11f9be09 1850 if not player_url and webpage:
109dd3b2 1851 player_url = self._search_regex(
1852 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1853 webpage, 'player URL', fatal=False)
11f9be09 1854 if not player_url:
1855 return None
109dd3b2 1856 if player_url.startswith('//'):
1857 player_url = 'https:' + player_url
1858 elif not re.match(r'https?://', player_url):
1859 player_url = compat_urlparse.urljoin(
1860 'https://www.youtube.com', player_url)
1861 return player_url
1862
60064c53
PH
1863 def _signature_cache_id(self, example_sig):
1864 """ Return a string representation of a signature """
78caa52a 1865 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1866
e40c758c
S
1867 @classmethod
1868 def _extract_player_info(cls, player_url):
1869 for player_re in cls._PLAYER_INFO_RE:
1870 id_m = re.search(player_re, player_url)
1871 if id_m:
1872 break
1873 else:
c081b35c 1874 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1875 return id_m.group('id')
e40c758c 1876
109dd3b2 1877 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1878 player_id = self._extract_player_info(player_url)
1879 if player_id not in self._code_cache:
1880 self._code_cache[player_id] = self._download_webpage(
1881 player_url, video_id, fatal=fatal,
1882 note='Downloading player ' + player_id,
1883 errnote='Download of %s failed' % player_url)
1884 return player_id in self._code_cache
1885
e40c758c 1886 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1887 player_id = self._extract_player_info(player_url)
e0df6211 1888
c4417ddb 1889 # Read from filesystem cache
545cc85d 1890 func_id = 'js_%s_%s' % (
1891 player_id, self._signature_cache_id(example_sig))
c4417ddb 1892 assert os.path.basename(func_id) == func_id
a0e07d31 1893
69ea8ca4 1894 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1895 if cache_spec is not None:
78caa52a 1896 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1897
109dd3b2 1898 if self._load_player(video_id, player_url):
1899 code = self._code_cache[player_id]
1900 res = self._parse_sig_js(code)
e0df6211 1901
109dd3b2 1902 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1903 cache_res = res(test_string)
1904 cache_spec = [ord(c) for c in cache_res]
83799698 1905
109dd3b2 1906 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1907 return res
83799698 1908
60064c53 1909 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1910 def gen_sig_code(idxs):
1911 def _genslice(start, end, step):
78caa52a 1912 starts = '' if start == 0 else str(start)
8bcc8756 1913 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1914 steps = '' if step == 1 else (':%d' % step)
78caa52a 1915 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1916
1917 step = None
7af808a5
PH
1918 # Quelch pyflakes warnings - start will be set when step is set
1919 start = '(Never used)'
edf3e38e
PH
1920 for i, prev in zip(idxs[1:], idxs[:-1]):
1921 if step is not None:
1922 if i - prev == step:
1923 continue
1924 yield _genslice(start, prev, step)
1925 step = None
1926 continue
1927 if i - prev in [-1, 1]:
1928 step = i - prev
1929 start = prev
1930 continue
1931 else:
78caa52a 1932 yield 's[%d]' % prev
edf3e38e 1933 if step is None:
78caa52a 1934 yield 's[%d]' % i
edf3e38e
PH
1935 else:
1936 yield _genslice(start, i, step)
1937
78caa52a 1938 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1939 cache_res = func(test_string)
edf3e38e 1940 cache_spec = [ord(c) for c in cache_res]
78caa52a 1941 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1942 signature_id_tuple = '(%s)' % (
1943 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1944 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1945 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1946 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1947
e0df6211
PH
1948 def _parse_sig_js(self, jscode):
1949 funcname = self._search_regex(
abefc03f
S
1950 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1951 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1952 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1953 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1954 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1955 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1956 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1957 # Obsolete patterns
1958 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1959 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1960 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1961 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1962 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1963 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1964 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1965 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1966 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1967
1968 jsi = JSInterpreter(jscode)
1969 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1970 return lambda s: initial_function([s])
1971
545cc85d 1972 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1973 """Turn the encrypted s field into a working signature"""
6b37f0be 1974
c8bf86d5 1975 if player_url is None:
69ea8ca4 1976 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1977
c8bf86d5 1978 try:
62af3a0e 1979 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1980 if player_id not in self._player_cache:
1981 func = self._extract_signature_function(
60064c53 1982 video_id, player_url, s
c8bf86d5
PH
1983 )
1984 self._player_cache[player_id] = func
1985 func = self._player_cache[player_id]
a06916d9 1986 if self.get_param('youtube_print_sig_code'):
60064c53 1987 self._print_sig_code(func, s)
c8bf86d5
PH
1988 return func(s)
1989 except Exception as e:
1990 tb = traceback.format_exc()
1991 raise ExtractorError(
78caa52a 1992 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1993
109dd3b2 1994 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1995 """
1996 Extract signatureTimestamp (sts)
1997 Required to tell API what sig/player version is in use.
1998 """
1999 sts = None
2000 if isinstance(ytcfg, dict):
2001 sts = int_or_none(ytcfg.get('STS'))
2002
2003 if not sts:
2004 # Attempt to extract from player
2005 if player_url is None:
2006 error_msg = 'Cannot extract signature timestamp without player_url.'
2007 if fatal:
2008 raise ExtractorError(error_msg)
2009 self.report_warning(error_msg)
2010 return
2011 if self._load_player(video_id, player_url, fatal=fatal):
2012 player_id = self._extract_player_info(player_url)
2013 code = self._code_cache[player_id]
2014 sts = int_or_none(self._search_regex(
2015 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2016 'JS player signature timestamp', group='sts', fatal=fatal))
2017 return sts
2018
11f9be09 2019 def _mark_watched(self, video_id, player_responses):
352d63fd 2020 playback_url = traverse_obj(
2021 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2022 expected_type=url_or_none, get_all=False)
d77ab8e2 2023 if not playback_url:
352d63fd 2024 self.report_warning('Unable to mark watched')
d77ab8e2
S
2025 return
2026 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2027 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2028
2029 # cpn generation algorithm is reverse engineered from base.js.
2030 # In fact it works even with dummy cpn.
2031 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2032 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2033
2034 qs.update({
2035 'ver': ['2'],
2036 'cpn': [cpn],
2037 })
2038 playback_url = compat_urlparse.urlunparse(
15707c7e 2039 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
2040
2041 self._download_webpage(
2042 playback_url, video_id, 'Marking watched',
2043 'Unable to mark watched', fatal=False)
2044
66c9fa36
S
2045 @staticmethod
2046 def _extract_urls(webpage):
2047 # Embedded YouTube player
2048 entries = [
2049 unescapeHTML(mobj.group('url'))
2050 for mobj in re.finditer(r'''(?x)
2051 (?:
2052 <iframe[^>]+?src=|
2053 data-video-url=|
2054 <embed[^>]+?src=|
2055 embedSWF\(?:\s*|
2056 <object[^>]+data=|
2057 new\s+SWFObject\(
2058 )
2059 (["\'])
2060 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2061 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2062 \1''', webpage)]
2063
2064 # lazyYT YouTube embed
2065 entries.extend(list(map(
2066 unescapeHTML,
2067 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2068
2069 # Wordpress "YouTube Video Importer" plugin
2070 matches = re.findall(r'''(?x)<div[^>]+
2071 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2072 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2073 entries.extend(m[-1] for m in matches)
2074
2075 return entries
2076
2077 @staticmethod
2078 def _extract_url(webpage):
2079 urls = YoutubeIE._extract_urls(webpage)
2080 return urls[0] if urls else None
2081
97665381
PH
2082 @classmethod
2083 def extract_id(cls, url):
2084 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2085 if mobj is None:
69ea8ca4 2086 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
2087 video_id = mobj.group(2)
2088 return video_id
2089
7c365c21 2090 def _extract_chapters_from_json(self, data, duration):
2091 chapter_list = traverse_obj(
2092 data, (
2093 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2094 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2095 ), expected_type=list)
2096
2097 return self._extract_chapters(
2098 chapter_list,
2099 chapter_time=lambda chapter: float_or_none(
2100 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2101 chapter_title=lambda chapter: traverse_obj(
2102 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2103 duration=duration)
2104
2105 def _extract_chapters_from_engagement_panel(self, data, duration):
2106 content_list = traverse_obj(
8bdd16b4 2107 data,
7c365c21 2108 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2109 expected_type=list, default=[])
052e1350 2110 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2111 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2112
2113 return next((
2114 filter(None, (
2115 self._extract_chapters(
2116 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2117 chapter_time, chapter_title, duration)
2118 for contents in content_list
2119 ))), [])
2120
2121 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2122 chapters = []
7c365c21 2123 last_chapter = {'start_time': 0}
2124 for idx, chapter in enumerate(chapter_list or []):
2125 title = chapter_title(chapter)
84213ea8
S
2126 start_time = chapter_time(chapter)
2127 if start_time is None:
2128 continue
7c365c21 2129 last_chapter['end_time'] = start_time
2130 if start_time < last_chapter['start_time']:
2131 if idx == 1:
2132 chapters.pop()
2133 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2134 else:
2135 self.report_warning(f'Invalid start time for chapter "{title}"')
2136 continue
2137 last_chapter = {'start_time': start_time, 'title': title}
2138 chapters.append(last_chapter)
2139 last_chapter['end_time'] = duration
84213ea8
S
2140 return chapters
2141
545cc85d 2142 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2143 return self._parse_json(self._search_regex(
2144 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2145 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2146
d92f5d5a 2147 @staticmethod
2148 def parse_time_text(time_text):
2149 """
2150 Parse the comment time text
2151 time_text is in the format 'X units ago (edited)'
2152 """
2153 time_text_split = time_text.split(' ')
2154 if len(time_text_split) >= 3:
da503b7a 2155 try:
2156 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2157 except ValueError:
2158 return None
d92f5d5a 2159
a1c5d2ca
M
2160 def _extract_comment(self, comment_renderer, parent=None):
2161 comment_id = comment_renderer.get('commentId')
2162 if not comment_id:
2163 return
fe93e2c4 2164
052e1350 2165 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2166
49bd8c66 2167 # note: timestamp is an estimate calculated from the current time and time_text
052e1350 2168 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
fe93e2c4 2169 time_text_dt = self.parse_time_text(time_text)
2170 if isinstance(time_text_dt, datetime.datetime):
2171 timestamp = calendar.timegm(time_text_dt.timetuple())
052e1350 2172 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2173 author_id = try_get(comment_renderer,
2174 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2175
49bd8c66 2176 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2177 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2178 author_thumbnail = try_get(comment_renderer,
2179 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2180
2181 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2182 is_favorited = 'creatorHeart' in (try_get(
2183 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2184 return {
2185 'id': comment_id,
2186 'text': text,
d92f5d5a 2187 'timestamp': timestamp,
a1c5d2ca
M
2188 'time_text': time_text,
2189 'like_count': votes,
97524332 2190 'is_favorited': is_favorited,
a1c5d2ca
M
2191 'author': author,
2192 'author_id': author_id,
2193 'author_thumbnail': author_thumbnail,
2194 'author_is_uploader': author_is_uploader,
2195 'parent': parent or 'root'
2196 }
2197
2198 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2199 ytcfg, video_id, parent=None, comment_counts=None):
2200
2201 def extract_header(contents):
2202 _total_comments = 0
2203 _continuation = None
2204 for content in contents:
2205 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2206 expected_comment_count = parse_count(self._get_text(
052e1350 2207 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2208
2d6659b9 2209 if expected_comment_count:
fe93e2c4 2210 comment_counts[1] = expected_comment_count
2211 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2212 _total_comments = comment_counts[1]
2213 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2214 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2215
2216 sort_menu_item = try_get(
2217 comments_header_renderer,
2218 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2219 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2220
2221 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2222 if not _continuation:
2223 continue
2224
2225 sort_text = sort_menu_item.get('title')
2226 if isinstance(sort_text, compat_str):
2227 sort_text = sort_text.lower()
2228 else:
2229 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2230 self.to_screen('Sorting comments by %s' % sort_text)
2231 break
2232 return _total_comments, _continuation
a1c5d2ca 2233
2d6659b9 2234 def extract_thread(contents):
a1c5d2ca
M
2235 if not parent:
2236 comment_counts[2] = 0
2237 for content in contents:
2238 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2239 comment_renderer = try_get(
2240 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2241 content, (lambda x: x['commentRenderer'], dict))
2242
2243 if not comment_renderer:
2244 continue
2245 comment = self._extract_comment(comment_renderer, parent)
2246 if not comment:
2247 continue
2248 comment_counts[0] += 1
2249 yield comment
2250 # Attempt to get the replies
2251 comment_replies_renderer = try_get(
2252 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2253
2254 if comment_replies_renderer:
2255 comment_counts[2] += 1
2256 comment_entries_iter = self._comment_entries(
f4f751af 2257 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2258 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2259
2260 for reply_comment in comment_entries_iter:
2261 yield reply_comment
2262
2d6659b9 2263 # YouTube comments have a max depth of 2
2264 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2265 if max_depth == 1 and parent:
2266 return
a1c5d2ca
M
2267 if not comment_counts:
2268 # comment so far, est. total comments, current comment thread #
2269 comment_counts = [0, 0, 0]
a1c5d2ca 2270
2d6659b9 2271 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2272 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2273 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2274 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2275 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2276
2277 visitor_data = None
2278 is_first_continuation = parent is None
a1c5d2ca
M
2279
2280 for page_num in itertools.count(0):
2281 if not continuation:
2282 break
11f9be09 2283 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2284 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2285 if page_num == 0:
2286 if is_first_continuation:
2287 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2288 else:
2d6659b9 2289 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2290 comment_counts[2], comment_prog_str)
2291 else:
2292 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2293 ' ' if parent else '', ' replies' if parent else '',
2294 page_num, comment_prog_str)
2295
2296 response = self._extract_response(
fe93e2c4 2297 item_id=None, query=continuation,
2d6659b9 2298 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2299 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2300 if not response:
2301 break
f4f751af 2302 visitor_data = try_get(
2303 response,
2304 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2305 compat_str) or visitor_data
a1c5d2ca 2306
2d6659b9 2307 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2308
2d6659b9 2309 continuation = None
2310 if isinstance(continuation_contents, list):
2311 for continuation_section in continuation_contents:
2312 if not isinstance(continuation_section, dict):
2313 continue
2314 continuation_items = try_get(
2315 continuation_section,
2316 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2317 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2318 list) or []
2319 if is_first_continuation:
2320 total_comments, continuation = extract_header(continuation_items)
2321 if total_comments:
2322 yield total_comments
2323 is_first_continuation = False
2324 if continuation:
2325 break
2326 continue
2327 count = 0
2328 for count, entry in enumerate(extract_thread(continuation_items)):
2329 yield entry
2330 continuation = self._extract_continuation({'contents': continuation_items})
2331 if continuation:
2332 # Sometimes YouTube provides a continuation without any comments
2333 # In most cases we end up just downloading these with very little comments to come.
2334 if count == 0:
2335 if not parent:
2336 self.report_warning('No comments received - assuming end of comments')
2337 continuation = None
a1c5d2ca
M
2338 break
2339
2d6659b9 2340 # Deprecated response structure
2341 elif isinstance(continuation_contents, dict):
2342 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2343 for key, continuation_renderer in continuation_contents.items():
2344 if key not in known_continuation_renderers:
2345 continue
2346 if not isinstance(continuation_renderer, dict):
2347 continue
2348 if is_first_continuation:
2349 header_continuation_items = [continuation_renderer.get('header') or {}]
2350 total_comments, continuation = extract_header(header_continuation_items)
2351 if total_comments:
2352 yield total_comments
2353 is_first_continuation = False
2354 if continuation:
2355 break
a1c5d2ca 2356
2d6659b9 2357 # Sometimes YouTube provides a continuation without any comments
2358 # In most cases we end up just downloading these with very little comments to come.
2359 count = 0
2360 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2361 yield entry
2362 continuation = self._extract_continuation(continuation_renderer)
2363 if count == 0:
2364 if not parent:
2365 self.report_warning('No comments received - assuming end of comments')
2366 continuation = None
2367 break
a1c5d2ca 2368
2d6659b9 2369 @staticmethod
2370 def _generate_comment_continuation(video_id):
2371 """
2372 Generates initial comment section continuation token from given video id
2373 """
2374 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2375 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2376 new_continuation_intlist = list(itertools.chain.from_iterable(
2377 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2378 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2379
2380 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2381 """Entry for comment extraction"""
2d6659b9 2382 def _real_comment_extract(contents):
2383 if isinstance(contents, list):
2384 for entry in contents:
2385 for key, renderer in entry.items():
2386 if key not in known_entry_comment_renderers:
2387 continue
2388 yield from self._comment_entries(
2389 renderer, video_id=video_id, ytcfg=ytcfg,
2390 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2391 account_syncid=self._extract_account_syncid(ytcfg))
2392 break
a1c5d2ca 2393 comments = []
2d6659b9 2394 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2395 estimated_total = 0
2d6659b9 2396 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
65524694 2397 # Force English regardless of account setting to prevent parsing issues
2398 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2399 ytcfg = copy.deepcopy(ytcfg)
2400 traverse_obj(
2401 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2d6659b9 2402 try:
2403 for comment in _real_comment_extract(contents):
2404 if len(comments) >= max_comments:
2405 break
2406 if isinstance(comment, int):
2407 estimated_total = comment
2408 continue
2409 comments.append(comment)
2410 except KeyboardInterrupt:
2411 self.to_screen('Interrupted by user')
d92f5d5a 2412 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2413 return {
2414 'comments': comments,
2415 'comment_count': len(comments),
2416 }
2417
109dd3b2 2418 @staticmethod
2419 def _generate_player_context(sts=None):
2420 context = {
2421 'html5Preference': 'HTML5_PREF_WANTS',
2422 }
2423 if sts is not None:
2424 context['signatureTimestamp'] = sts
2425 return {
2426 'playbackContext': {
2427 'contentPlaybackContext': context
a1a7907b 2428 },
2fd226f6 2429 'contentCheckOk': True,
2430 'racyCheckOk': True
109dd3b2 2431 }
2432
11f9be09 2433 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
109dd3b2 2434
11f9be09 2435 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2436 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2437 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2438 headers = self.generate_api_headers(
2439 player_ytcfg, identity_token, syncid,
2440 default_client=self._YT_CLIENTS[client], session_index=session_index)
9297939e 2441
11f9be09 2442 yt_query = {'videoId': video_id}
2443 yt_query.update(self._generate_player_context(sts))
2444 return self._extract_response(
2445 item_id=video_id, ep='player', query=yt_query,
2446 ytcfg=player_ytcfg, headers=headers, fatal=False,
2447 default_client=self._YT_CLIENTS[client],
2448 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2449 ) or None
2450
11f9be09 2451 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2452 requested_clients = []
2453 allowed_clients = [client for client in self._YT_CLIENTS.keys() if client[:1] != '_']
2454 for client in self._configuration_arg('player_client'):
2455 if client in allowed_clients:
2456 requested_clients.append(client)
2457 elif client == 'all':
2458 requested_clients.extend(allowed_clients)
2459 else:
2460 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2461 if not requested_clients:
2462 requested_clients = ['android', 'web']
cf7e015f 2463
11f9be09 2464 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2465 requested_clients.extend(
2466 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
dbdaaa23 2467
11f9be09 2468 return orderedSet(requested_clients)
cf7e015f 2469
c0bc527b
M
2470 def _extract_player_ytcfg(self, client, video_id):
2471 url = {
2472 'web_music': 'https://music.youtube.com',
2473 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2474 }.get(client)
2475 if not url:
2476 return {}
2477 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2478 return self.extract_ytcfg(video_id, webpage) or {}
2479
11f9be09 2480 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2481 initial_pr = None
2482 if webpage:
2483 initial_pr = self._extract_yt_initial_variable(
2484 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2485 video_id, 'initial player response')
6b09401b 2486
c0bc527b
M
2487 original_clients = clients
2488 clients = clients[::-1]
2489 while clients:
2490 client = clients.pop()
11f9be09 2491 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2492 if 'configs' not in self._configuration_arg('player_skip'):
2493 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2494 if client == 'web_embedded':
2495 # If we extracted the embed webpage, it'll tell us if we can view the video
2496 embedded_pr = self._parse_json(
2497 traverse_obj(player_ytcfg, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
2498 video_id=video_id)
2499 embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
2500 if embedded_ps_reason in self._AGE_GATE_REASONS:
2501 self.report_warning(f'Youtube said: {embedded_ps_reason}')
2502 continue
2503
2504 pr = (
2505 initial_pr if client == 'web' and initial_pr
2506 else self._extract_player_response(
2507 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr))
11f9be09 2508 if pr:
2509 yield pr
c0bc527b 2510
ad34b295 2511 if traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
c0bc527b
M
2512 client = f'{client}_agegate'
2513 if client in self._YT_CLIENTS and client not in original_clients:
2514 clients.append(client)
2515
11f9be09 2516 # Android player_response does not have microFormats which are needed for
2517 # extraction of some data. So we return the initial_pr with formats
2518 # stripped out even if not requested by the user
2519 # See: https://github.com/yt-dlp/yt-dlp/issues/501
c0bc527b 2520 if initial_pr and 'web' not in original_clients:
11f9be09 2521 initial_pr['streamingData'] = None
2522 yield initial_pr
2523
2524 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2525 itags, stream_ids = [], []
2a9c6dcd 2526 itag_qualities, res_qualities = {}, {}
d3fc8074 2527 q = qualities([
2a9c6dcd 2528 # Normally tiny is the smallest video-only formats. But
2529 # audio-only formats with unknown quality may get tagged as tiny
2530 'tiny',
2531 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2532 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2533 ])
11f9be09 2534 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2535
545cc85d 2536 for fmt in streaming_formats:
2537 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2538 continue
321bf820 2539
cc2db878 2540 itag = str_or_none(fmt.get('itag'))
9297939e 2541 audio_track = fmt.get('audioTrack') or {}
2542 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2543 if stream_id in stream_ids:
2544 continue
2545
cc2db878 2546 quality = fmt.get('quality')
2a9c6dcd 2547 height = int_or_none(fmt.get('height'))
d3fc8074 2548 if quality == 'tiny' or not quality:
2549 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2550 # The 3gp format (17) in android client has a quality of "small",
2551 # but is actually worse than other formats
2552 if itag == '17':
2553 quality = 'tiny'
2554 if quality:
2555 if itag:
2556 itag_qualities[itag] = quality
2557 if height:
2558 res_qualities[height] = quality
cc2db878 2559 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2560 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2561 # number of fragment that would subsequently requested with (`&sq=N`)
2562 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2563 continue
2564
545cc85d 2565 fmt_url = fmt.get('url')
2566 if not fmt_url:
2567 sc = compat_parse_qs(fmt.get('signatureCipher'))
2568 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2569 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2570 if not (sc and fmt_url and encrypted_sig):
2571 continue
545cc85d 2572 if not player_url:
201e9eaa 2573 continue
545cc85d 2574 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2575 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2576 fmt_url += '&' + sp + '=' + signature
2577
545cc85d 2578 if itag:
2579 itags.append(itag)
9297939e 2580 stream_ids.append(stream_id)
2581
cc2db878 2582 tbr = float_or_none(
2583 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2584 dct = {
2585 'asr': int_or_none(fmt.get('audioSampleRate')),
2586 'filesize': int_or_none(fmt.get('contentLength')),
2587 'format_id': itag,
11f9be09 2588 'format_note': ', '.join(filter(None, (
2a9c6dcd 2589 audio_track.get('displayName'),
2590 fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
545cc85d 2591 'fps': int_or_none(fmt.get('fps')),
2a9c6dcd 2592 'height': height,
dca3ff4a 2593 'quality': q(quality),
cc2db878 2594 'tbr': tbr,
545cc85d 2595 'url': fmt_url,
2a9c6dcd 2596 'width': int_or_none(fmt.get('width')),
0fb983f6 2597 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2598 }
60bdb7bd 2599 mime_mobj = re.match(
2600 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2601 if mime_mobj:
2602 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2603 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2604 no_audio = dct.get('acodec') == 'none'
2605 no_video = dct.get('vcodec') == 'none'
2606 if no_audio:
2607 dct['vbr'] = tbr
2608 if no_video:
2609 dct['abr'] = tbr
2610 if no_audio or no_video:
545cc85d 2611 dct['downloader_options'] = {
2612 # Youtube throttles chunks >~10M
2613 'http_chunk_size': 10485760,
bf1317d2 2614 }
7c60c33e 2615 if dct.get('ext'):
2616 dct['container'] = dct['ext'] + '_dash'
11f9be09 2617 yield dct
545cc85d 2618
4bb6b02f 2619 skip_manifests = self._configuration_arg('skip')
11f9be09 2620 get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
5d3a0e79 2621 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2622
2a9c6dcd 2623 def guess_quality(f):
2624 for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2625 if val in qdict:
2626 return q(qdict[val])
2627 return -1
2628
11f9be09 2629 for sd in streaming_data:
5d3a0e79 2630 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2631 if hls_manifest_url:
2a9c6dcd 2632 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
9297939e 2633 itag = self._search_regex(
2634 r'/itag/(\d+)', f['url'], 'itag', default=None)
11f9be09 2635 if itag in itags:
2636 continue
9297939e 2637 if itag:
2638 f['format_id'] = itag
11f9be09 2639 itags.append(itag)
2a9c6dcd 2640 f['quality'] = guess_quality(f)
11f9be09 2641 yield f
545cc85d 2642
5d3a0e79 2643 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2644 if dash_manifest_url:
2a9c6dcd 2645 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
5d3a0e79 2646 itag = f['format_id']
2647 if itag in itags:
2648 continue
11f9be09 2649 if itag:
2650 itags.append(itag)
2a9c6dcd 2651 f['quality'] = guess_quality(f)
5d3a0e79 2652 filesize = int_or_none(self._search_regex(
2653 r'/clen/(\d+)', f.get('fragment_base_url')
2654 or f['url'], 'file size', default=None))
2655 if filesize:
2656 f['filesize'] = filesize
11f9be09 2657 yield f
2658
2659 def _real_extract(self, url):
2660 url, smuggled_data = unsmuggle_url(url, {})
2661 video_id = self._match_id(url)
2662
2663 base_url = self.http_scheme() + '//www.youtube.com/'
2664 webpage_url = base_url + 'watch?v=' + video_id
2665 webpage = self._download_webpage(
2666 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2667
2668 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2669 player_url = self._extract_player_url(master_ytcfg, webpage)
2670 identity_token = self._extract_identity_token(webpage, video_id)
2671
2672 player_responses = list(self._extract_player_responses(
2673 self._get_requested_clients(url, smuggled_data),
2674 video_id, webpage, master_ytcfg, player_url, identity_token))
2675
352d63fd 2676 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2677
2678 playability_statuses = traverse_obj(
2679 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2680
2681 trailer_video_id = get_first(
2682 playability_statuses,
2683 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2684 expected_type=str)
2685 if trailer_video_id:
2686 return self.url_result(
2687 trailer_video_id, self.ie_key(), trailer_video_id)
2688
2689 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2690 if webpage else (lambda x: None))
2691
2692 video_details = traverse_obj(
2693 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2694 microformats = traverse_obj(
2695 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2696 expected_type=dict, default=[])
2697 video_title = (
2698 get_first(video_details, 'title')
2699 or self._get_text(microformats, (..., 'title'))
2700 or search_meta(['og:title', 'twitter:title', 'title']))
2701 video_description = get_first(video_details, 'shortDescription')
2702
2703 if not smuggled_data.get('force_singlefeed', False):
2704 if not self.get_param('noplaylist'):
2705 multifeed_metadata_list = get_first(
2706 player_responses,
2707 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2708 expected_type=str)
2709 if multifeed_metadata_list:
2710 entries = []
2711 feed_ids = []
2712 for feed in multifeed_metadata_list.split(','):
2713 # Unquote should take place before split on comma (,) since textual
2714 # fields may contain comma as well (see
2715 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2716 feed_data = compat_parse_qs(
2717 compat_urllib_parse_unquote_plus(feed))
2718
2719 def feed_entry(name):
2720 return try_get(
2721 feed_data, lambda x: x[name][0], compat_str)
2722
2723 feed_id = feed_entry('id')
2724 if not feed_id:
2725 continue
2726 feed_title = feed_entry('title')
2727 title = video_title
2728 if feed_title:
2729 title += ' (%s)' % feed_title
2730 entries.append({
2731 '_type': 'url_transparent',
2732 'ie_key': 'Youtube',
2733 'url': smuggle_url(
2734 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2735 {'force_singlefeed': True}),
2736 'title': title,
2737 })
2738 feed_ids.append(feed_id)
2739 self.to_screen(
2740 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2741 % (', '.join(feed_ids), video_id))
2742 return self.playlist_result(
2743 entries, video_id, video_title, video_description)
2744 else:
2745 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2746
7ea65411 2747 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2748 is_live = get_first(video_details, 'isLive')
7ea65411 2749 if is_live is None:
2750 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2751
2752 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2753 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2754
545cc85d 2755 if not formats:
11f9be09 2756 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
b7da73eb 2757 self.raise_no_formats(
545cc85d 2758 'This video is DRM protected.', expected=True)
11f9be09 2759 pemr = get_first(
2760 playability_statuses,
2761 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2762 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2763 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2764 if subreason:
545cc85d 2765 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2766 countries = get_first(microformats, 'availableCountries')
545cc85d 2767 if not countries:
2768 regions_allowed = search_meta('regionsAllowed')
2769 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2770 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2771 reason += f'. {subreason}'
545cc85d 2772 if reason:
b7da73eb 2773 self.raise_no_formats(reason, expected=True)
bf1317d2 2774
11f9be09 2775 for f in formats:
2a9c6dcd 2776 if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
11f9be09 2777 f['source_preference'] = -10
2a9c6dcd 2778 note = f.get('format_note')
2779 f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
11f9be09 2780
2a9c6dcd 2781 # Source is given priority since formats that throttle are given lower source_preference
2782 # When throttling issue is fully fixed, remove this
2783 self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
bf1317d2 2784
11f9be09 2785 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2786 if not keywords and webpage:
2787 keywords = [
2788 unescapeHTML(m.group('content'))
2789 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2790 for keyword in keywords:
2791 if keyword.startswith('yt:stretch='):
201c1459 2792 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2793 if mobj:
2794 # NB: float is intentional for forcing float division
2795 w, h = (float(v) for v in mobj.groups())
2796 if w > 0 and h > 0:
2797 ratio = w / h
2798 for f in formats:
2799 if f.get('vcodec') != 'none':
2800 f['stretched_ratio'] = ratio
2801 break
6449cd80 2802
545cc85d 2803 thumbnails = []
11f9be09 2804 thumbnail_dicts = traverse_obj(
2805 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2806 expected_type=dict, default=[])
2807 for thumbnail in thumbnail_dicts:
2808 thumbnail_url = thumbnail.get('url')
2809 if not thumbnail_url:
2810 continue
2811 # Sometimes youtube gives a wrong thumbnail URL. See:
2812 # https://github.com/yt-dlp/yt-dlp/issues/233
2813 # https://github.com/ytdl-org/youtube-dl/issues/28023
2814 if 'maxresdefault' in thumbnail_url:
2815 thumbnail_url = thumbnail_url.split('?')[0]
2816 thumbnails.append({
2817 'url': thumbnail_url,
2818 'height': int_or_none(thumbnail.get('height')),
2819 'width': int_or_none(thumbnail.get('width')),
2820 })
ff2751ac 2821 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2822 if thumbnail_url:
2823 thumbnails.append({
2824 'url': thumbnail_url,
ff2751ac 2825 })
0ba692ac 2826 # The best resolution thumbnails sometimes does not appear in the webpage
2827 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2828 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2829 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
245524e6 2830 # TODO: Test them also? - For some videos, even these don't exist
cca80fe6 2831 guaranteed_thumbnail_names = [
2832 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2833 'mqdefault', 'mq1', 'mq2', 'mq3',
2834 'default', '1', '2', '3'
2835 ]
2836 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2837 n_thumbnail_names = len(thumbnail_names)
2838
0ba692ac 2839 thumbnails.extend({
2840 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2841 video_id=video_id, name=name, ext=ext,
2842 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2843 '_test_url': name in hq_thumbnail_names,
2844 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2845 for thumb in thumbnails:
cca80fe6 2846 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2847 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2848 self._remove_duplicate_formats(thumbnails)
545cc85d 2849
7ea65411 2850 category = get_first(microformats, 'category') or search_meta('genre')
2851 channel_id = str_or_none(
2852 get_first(video_details, 'channelId')
2853 or get_first(microformats, 'externalChannelId')
2854 or search_meta('channelId'))
2855 duration = int_or_none(
2856 get_first(video_details, 'lengthSeconds')
2857 or get_first(microformats, 'lengthSeconds')
2858 or parse_duration(search_meta('duration'))) or None
2859 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2860
2861 live_content = get_first(video_details, 'isLiveContent')
2862 is_upcoming = get_first(video_details, 'isUpcoming')
2863 if is_live is None:
2864 if is_upcoming or live_content is False:
2865 is_live = False
2866 if is_upcoming is None and (live_content or is_live):
2867 is_upcoming = False
2868 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2869 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2870 if not duration and live_endtime and live_starttime:
2871 duration = live_endtime - live_starttime
2872
545cc85d 2873 info = {
2874 'id': video_id,
2875 'title': self._live_title(video_title) if is_live else video_title,
2876 'formats': formats,
2877 'thumbnails': thumbnails,
2878 'description': video_description,
2879 'upload_date': unified_strdate(
11f9be09 2880 get_first(microformats, 'uploadDate')
545cc85d 2881 or search_meta('uploadDate')),
11f9be09 2882 'uploader': get_first(video_details, 'author'),
545cc85d 2883 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2884 'uploader_url': owner_profile_url,
2885 'channel_id': channel_id,
11f9be09 2886 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2887 'duration': duration,
2888 'view_count': int_or_none(
11f9be09 2889 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2890 or search_meta('interactionCount')),
11f9be09 2891 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2892 'age_limit': 18 if (
11f9be09 2893 get_first(microformats, 'isFamilySafe') is False
545cc85d 2894 or search_meta('isFamilyFriendly') == 'false'
2895 or search_meta('og:restrictions:age') == '18+') else 0,
2896 'webpage_url': webpage_url,
2897 'categories': [category] if category else None,
2898 'tags': keywords,
11f9be09 2899 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2900 'is_live': is_live,
2901 'was_live': (False if is_live or is_upcoming or live_content is False
2902 else None if is_live is None or is_upcoming is None
2903 else live_content),
2904 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2905 'release_timestamp': live_starttime,
545cc85d 2906 }
b477fc13 2907
3944e7af 2908 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2909 # Converted into dicts to remove duplicates
2910 captions = {
2911 sub.get('baseUrl'): sub
2912 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2913 translation_languages = {
2914 lang.get('languageCode'): lang.get('languageName')
2915 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
545cc85d 2916 subtitles = {}
2917 if pctr:
774d79cc 2918 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2919 lang_subs = container.setdefault(lang_code, [])
545cc85d 2920 for fmt in self._SUBTITLE_FORMATS:
2921 query.update({
2922 'fmt': fmt,
2923 })
2924 lang_subs.append({
2925 'ext': fmt,
2926 'url': update_url_query(base_url, query),
774d79cc 2927 'name': sub_name,
545cc85d 2928 })
7e72694b 2929
3944e7af 2930 for base_url, caption_track in captions.items():
545cc85d 2931 if not base_url:
2932 continue
2933 if caption_track.get('kind') != 'asr':
120916da 2934 lang_code = (
2935 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2936 or caption_track.get('languageCode'))
545cc85d 2937 if not lang_code:
2938 continue
2939 process_language(
774d79cc 2940 subtitles, base_url, lang_code,
3944e7af 2941 traverse_obj(caption_track, ('name', 'simpleText')),
774d79cc 2942 {})
545cc85d 2943 continue
2944 automatic_captions = {}
3944e7af 2945 for trans_code, trans_name in translation_languages.items():
2946 if not trans_code:
545cc85d 2947 continue
2948 process_language(
3944e7af 2949 automatic_captions, base_url, trans_code,
2950 self._get_text(trans_name, max_runs=1),
2951 {'tlang': trans_code})
545cc85d 2952 info['automatic_captions'] = automatic_captions
2953 info['subtitles'] = subtitles
7e72694b 2954
545cc85d 2955 parsed_url = compat_urllib_parse_urlparse(url)
2956 for component in [parsed_url.fragment, parsed_url.query]:
2957 query = compat_parse_qs(component)
2958 for k, v in query.items():
2959 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2960 d_k += '_time'
2961 if d_k not in info and k in s_ks:
2962 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2963
2964 # Youtube Music Auto-generated description
822b9d9c 2965 if video_description:
38d70284 2966 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2967 if mobj:
822b9d9c
RA
2968 release_year = mobj.group('release_year')
2969 release_date = mobj.group('release_date')
2970 if release_date:
2971 release_date = release_date.replace('-', '')
2972 if not release_year:
545cc85d 2973 release_year = release_date[:4]
2974 info.update({
2975 'album': mobj.group('album'.strip()),
2976 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2977 'track': mobj.group('track').strip(),
2978 'release_date': release_date,
cc2db878 2979 'release_year': int_or_none(release_year),
545cc85d 2980 })
7e72694b 2981
545cc85d 2982 initial_data = None
2983 if webpage:
2984 initial_data = self._extract_yt_initial_variable(
2985 webpage, self._YT_INITIAL_DATA_RE, video_id,
2986 'yt initial data')
2987 if not initial_data:
11f9be09 2988 headers = self.generate_api_headers(
2989 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2990 session_index=self._extract_session_index(master_ytcfg))
2991
109dd3b2 2992 initial_data = self._extract_response(
2993 item_id=video_id, ep='next', fatal=False,
11f9be09 2994 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
109dd3b2 2995 note='Downloading initial data API JSON')
545cc85d 2996
c60ee3a2 2997 try:
2998 # This will error if there is no livechat
2999 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3000 info['subtitles']['live_chat'] = [{
3001 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3002 'video_id': video_id,
3003 'ext': 'json',
f6745c49 3004 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3005 }]
3006 except (KeyError, IndexError, TypeError):
3007 pass
545cc85d 3008
3009 if initial_data:
7c365c21 3010 info['chapters'] = (
3011 self._extract_chapters_from_json(initial_data, duration)
3012 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3013 or None)
545cc85d 3014
3015 contents = try_get(
3016 initial_data,
3017 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3018 list) or []
3019 for content in contents:
3020 vpir = content.get('videoPrimaryInfoRenderer')
3021 if vpir:
3022 stl = vpir.get('superTitleLink')
3023 if stl:
fe93e2c4 3024 stl = self._get_text(stl)
545cc85d 3025 if try_get(
3026 vpir,
3027 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3028 info['location'] = stl
3029 else:
3030 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3031 if mobj:
3032 info.update({
3033 'series': mobj.group(1),
3034 'season_number': int(mobj.group(2)),
3035 'episode_number': int(mobj.group(3)),
3036 })
3037 for tlb in (try_get(
3038 vpir,
3039 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3040 list) or []):
3041 tbr = tlb.get('toggleButtonRenderer') or {}
3042 for getter, regex in [(
3043 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3044 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3045 lambda x: x['accessibility'],
3046 lambda x: x['accessibilityData']['accessibilityData'],
3047 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3048 label = (try_get(tbr, getter, dict) or {}).get('label')
3049 if label:
3050 mobj = re.match(regex, label)
3051 if mobj:
3052 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3053 break
3054 sbr_tooltip = try_get(
3055 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3056 if sbr_tooltip:
3057 like_count, dislike_count = sbr_tooltip.split(' / ')
3058 info.update({
3059 'like_count': str_to_int(like_count),
3060 'dislike_count': str_to_int(dislike_count),
3061 })
3062 vsir = content.get('videoSecondaryInfoRenderer')
3063 if vsir:
052e1350 3064 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3065 rows = try_get(
3066 vsir,
3067 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3068 list) or []
3069 multiple_songs = False
3070 for row in rows:
3071 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3072 multiple_songs = True
3073 break
3074 for row in rows:
3075 mrr = row.get('metadataRowRenderer') or {}
3076 mrr_title = mrr.get('title')
3077 if not mrr_title:
3078 continue
052e1350 3079 mrr_title = self._get_text(mrr, 'title')
3080 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3081 if mrr_title == 'License':
3082 info['license'] = mrr_contents_text
3083 elif not multiple_songs:
3084 if mrr_title == 'Album':
3085 info['album'] = mrr_contents_text
3086 elif mrr_title == 'Artist':
3087 info['artist'] = mrr_contents_text
3088 elif mrr_title == 'Song':
3089 info['track'] = mrr_contents_text
3090
3091 fallbacks = {
3092 'channel': 'uploader',
3093 'channel_id': 'uploader_id',
3094 'channel_url': 'uploader_url',
3095 }
3096 for to, frm in fallbacks.items():
3097 if not info.get(to):
3098 info[to] = info.get(frm)
3099
3100 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3101 v = info.get(s_k)
3102 if v:
3103 info[d_k] = v
b84071c0 3104
11f9be09 3105 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3106 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3107 is_membersonly = None
b28f8d24 3108 is_premium = None
c224251a
M
3109 if initial_data and is_private is not None:
3110 is_membersonly = False
b28f8d24 3111 is_premium = False
47193e02 3112 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3113 badge_labels = set()
3114 for content in contents:
3115 if not isinstance(content, dict):
3116 continue
3117 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3118 for badge_label in badge_labels:
3119 if badge_label.lower() == 'members only':
3120 is_membersonly = True
3121 elif badge_label.lower() == 'premium':
3122 is_premium = True
3123 elif badge_label.lower() == 'unlisted':
3124 is_unlisted = True
c224251a 3125
c224251a
M
3126 info['availability'] = self._availability(
3127 is_private=is_private,
b28f8d24 3128 needs_premium=is_premium,
c224251a
M
3129 needs_subscription=is_membersonly,
3130 needs_auth=info['age_limit'] >= 18,
3131 is_unlisted=None if is_private is None else is_unlisted)
3132
06167fbb 3133 # get xsrf for annotations or comments
a06916d9 3134 get_annotations = self.get_param('writeannotations', False)
3135 get_comments = self.get_param('getcomments', False)
06167fbb 3136 if get_annotations or get_comments:
29f7c58a 3137 xsrf_token = None
11f9be09 3138 if master_ytcfg:
3139 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
29f7c58a 3140 if not xsrf_token:
3141 xsrf_token = self._search_regex(
3142 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 3143 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 3144
3145 # annotations
06167fbb 3146 if get_annotations:
11f9be09 3147 invideo_url = get_first(
3148 player_responses,
3149 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3150 expected_type=str)
64b6a4e9 3151 if xsrf_token and invideo_url:
29f7c58a 3152 xsrf_field_name = None
11f9be09 3153 if master_ytcfg:
3154 xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
29f7c58a 3155 if not xsrf_field_name:
3156 xsrf_field_name = self._search_regex(
3157 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3158 webpage, 'xsrf field name',
29f7c58a 3159 group='xsrf_field_name', default='session_token')
8a784c74 3160 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3161 self._proto_relative_url(invideo_url),
3162 video_id, note='Downloading annotations',
3163 errnote='Unable to download video annotations', fatal=False,
3164 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3165
277d6ff5 3166 if get_comments:
11f9be09 3167 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3168
11f9be09 3169 self.mark_watched(video_id, player_responses)
d77ab8e2 3170
545cc85d 3171 return info
c5e8d7af 3172
5f6a1245 3173
8bdd16b4 3174class YoutubeTabIE(YoutubeBaseInfoExtractor):
3175 IE_DESC = 'YouTube.com tab'
70d5c17b 3176 _VALID_URL = r'''(?x)
3177 https?://
3178 (?:\w+\.)?
3179 (?:
3180 youtube(?:kids)?\.com|
3181 invidio\.us
3182 )/
3183 (?:
fe03a6cd 3184 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3185 (?P<not_channel>
9ba5705a 3186 feed/|hashtag/|
70d5c17b 3187 (?:playlist|watch)\?.*?\blist=
3188 )|
29f7c58a 3189 (?!(?:%s)\b) # Direct URLs
70d5c17b 3190 )
3191 (?P<id>[^/?\#&]+)
3192 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3193 IE_NAME = 'youtube:tab'
3194
81127aa5 3195 _TESTS = [{
da692b79 3196 'note': 'playlists, multipage',
8bdd16b4 3197 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3198 'playlist_mincount': 94,
3199 'info_dict': {
3200 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3201 'title': 'Игорь Клейнер - Playlists',
3202 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3203 'uploader': 'Игорь Клейнер',
3204 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3205 },
3206 }, {
da692b79 3207 'note': 'playlists, multipage, different order',
8bdd16b4 3208 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3209 'playlist_mincount': 94,
3210 'info_dict': {
3211 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3212 'title': 'Игорь Клейнер - Playlists',
3213 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3214 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3215 'uploader': 'Игорь Клейнер',
8bdd16b4 3216 },
201c1459 3217 }, {
da692b79 3218 'note': 'playlists, series',
201c1459 3219 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3220 'playlist_mincount': 5,
3221 'info_dict': {
3222 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3223 'title': '3Blue1Brown - Playlists',
3224 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3225 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3226 'uploader': '3Blue1Brown',
201c1459 3227 },
8bdd16b4 3228 }, {
da692b79 3229 'note': 'playlists, singlepage',
8bdd16b4 3230 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3231 'playlist_mincount': 4,
3232 'info_dict': {
3233 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3234 'title': 'ThirstForScience - Playlists',
3235 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3236 'uploader': 'ThirstForScience',
3237 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3238 }
3239 }, {
3240 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3241 'only_matching': True,
3242 }, {
da692b79 3243 'note': 'basic, single video playlist',
0e30a7b9 3244 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3245 'info_dict': {
0e30a7b9 3246 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3247 'uploader': 'Sergey M.',
3248 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3249 'title': 'youtube-dl public playlist',
81127aa5 3250 },
0e30a7b9 3251 'playlist_count': 1,
9291475f 3252 }, {
da692b79 3253 'note': 'empty playlist',
0e30a7b9 3254 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3255 'info_dict': {
0e30a7b9 3256 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3257 'uploader': 'Sergey M.',
3258 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3259 'title': 'youtube-dl empty playlist',
9291475f
PH
3260 },
3261 'playlist_count': 0,
3262 }, {
da692b79 3263 'note': 'Home tab',
8bdd16b4 3264 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3265 'info_dict': {
8bdd16b4 3266 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3267 'title': 'lex will - Home',
3268 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3269 'uploader': 'lex will',
3270 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3271 },
8bdd16b4 3272 'playlist_mincount': 2,
9291475f 3273 }, {
da692b79 3274 'note': 'Videos tab',
8bdd16b4 3275 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3276 'info_dict': {
8bdd16b4 3277 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3278 'title': 'lex will - Videos',
3279 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3280 'uploader': 'lex will',
3281 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3282 },
8bdd16b4 3283 'playlist_mincount': 975,
9291475f 3284 }, {
da692b79 3285 'note': 'Videos tab, sorted by popular',
8bdd16b4 3286 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3287 'info_dict': {
8bdd16b4 3288 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3289 'title': 'lex will - Videos',
3290 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3291 'uploader': 'lex will',
3292 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3293 },
8bdd16b4 3294 'playlist_mincount': 199,
9291475f 3295 }, {
da692b79 3296 'note': 'Playlists tab',
8bdd16b4 3297 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3298 'info_dict': {
8bdd16b4 3299 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3300 'title': 'lex will - Playlists',
3301 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3302 'uploader': 'lex will',
3303 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3304 },
8bdd16b4 3305 'playlist_mincount': 17,
ac7553d0 3306 }, {
da692b79 3307 'note': 'Community tab',
8bdd16b4 3308 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3309 'info_dict': {
8bdd16b4 3310 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3311 'title': 'lex will - Community',
3312 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3313 'uploader': 'lex will',
3314 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3315 },
3316 'playlist_mincount': 18,
87dadd45 3317 }, {
da692b79 3318 'note': 'Channels tab',
8bdd16b4 3319 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3320 'info_dict': {
8bdd16b4 3321 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3322 'title': 'lex will - Channels',
3323 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3324 'uploader': 'lex will',
3325 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3326 },
deaec5af 3327 'playlist_mincount': 12,
cd684175 3328 }, {
3329 'note': 'Search tab',
3330 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3331 'playlist_mincount': 40,
3332 'info_dict': {
3333 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3334 'title': '3Blue1Brown - Search - linear algebra',
3335 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3336 'uploader': '3Blue1Brown',
3337 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3338 },
6b08cdf6 3339 }, {
a0566bbf 3340 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3341 'only_matching': True,
3342 }, {
a0566bbf 3343 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3344 'only_matching': True,
3345 }, {
a0566bbf 3346 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3347 'only_matching': True,
3348 }, {
3349 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3350 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3351 'info_dict': {
3352 'title': '29C3: Not my department',
3353 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3354 'uploader': 'Christiaan008',
3355 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3356 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3357 },
3358 'playlist_count': 96,
3359 }, {
3360 'note': 'Large playlist',
3361 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3362 'info_dict': {
8bdd16b4 3363 'title': 'Uploads from Cauchemar',
3364 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3365 'uploader': 'Cauchemar',
3366 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3367 },
8bdd16b4 3368 'playlist_mincount': 1123,
3369 }, {
da692b79 3370 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3371 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3372 'only_matching': True,
4b7df0d3
JMF
3373 }, {
3374 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3375 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3376 'info_dict': {
acf757f4
PH
3377 'title': 'Uploads from Interstellar Movie',
3378 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3379 'uploader': 'Interstellar Movie',
8bdd16b4 3380 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3381 },
481cc733 3382 'playlist_mincount': 21,
358de58c 3383 }, {
3384 'note': 'Playlist with "show unavailable videos" button',
3385 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3386 'info_dict': {
3387 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3388 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3389 'uploader': 'Phim Siêu Nhân Nhật Bản',
3390 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3391 },
da692b79 3392 'playlist_mincount': 200,
5d342002 3393 }, {
da692b79 3394 'note': 'Playlist with unavailable videos in page 7',
5d342002 3395 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3396 'info_dict': {
3397 'title': 'Uploads from BlankTV',
3398 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3399 'uploader': 'BlankTV',
3400 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3401 },
da692b79 3402 'playlist_mincount': 1000,
8bdd16b4 3403 }, {
da692b79 3404 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3405 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3406 'info_dict': {
3407 'title': 'Data Analysis with Dr Mike Pound',
3408 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3409 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3410 'uploader': 'Computerphile',
deaec5af 3411 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3412 },
3413 'playlist_mincount': 11,
3414 }, {
a0566bbf 3415 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3416 'only_matching': True,
dacb3a86 3417 }, {
da692b79 3418 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3419 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3420 'info_dict': {
3421 'id': 'FqZTN594JQw',
3422 'ext': 'webm',
3423 'title': "Smiley's People 01 detective, Adventure Series, Action",
3424 'uploader': 'STREEM',
3425 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3426 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3427 'upload_date': '20150526',
3428 'license': 'Standard YouTube License',
3429 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3430 'categories': ['People & Blogs'],
3431 'tags': list,
dbdaaa23 3432 'view_count': int,
dacb3a86
S
3433 'like_count': int,
3434 'dislike_count': int,
3435 },
3436 'params': {
3437 'skip_download': True,
3438 },
13a75688 3439 'skip': 'This video is not available.',
dacb3a86 3440 'add_ie': [YoutubeIE.ie_key()],
481cc733 3441 }, {
8bdd16b4 3442 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3443 'only_matching': True,
66b48727 3444 }, {
8bdd16b4 3445 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3446 'only_matching': True,
a0566bbf 3447 }, {
3448 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3449 'info_dict': {
11f9be09 3450 'id': 'FMtPN8yp5LU', # This will keep changing
a0566bbf 3451 'ext': 'mp4',
deaec5af 3452 'title': compat_str,
a0566bbf 3453 'uploader': 'Sky News',
3454 'uploader_id': 'skynews',
3455 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3456 'upload_date': r're:\d{8}',
3457 'description': compat_str,
a0566bbf 3458 'categories': ['News & Politics'],
3459 'tags': list,
3460 'like_count': int,
3461 'dislike_count': int,
3462 },
3463 'params': {
3464 'skip_download': True,
3465 },
da692b79 3466 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3467 }, {
3468 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3469 'info_dict': {
3470 'id': 'a48o2S1cPoo',
3471 'ext': 'mp4',
3472 'title': 'The Young Turks - Live Main Show',
3473 'uploader': 'The Young Turks',
3474 'uploader_id': 'TheYoungTurks',
3475 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3476 'upload_date': '20150715',
3477 'license': 'Standard YouTube License',
3478 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3479 'categories': ['News & Politics'],
3480 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3481 'like_count': int,
3482 'dislike_count': int,
3483 },
3484 'params': {
3485 'skip_download': True,
3486 },
3487 'only_matching': True,
3488 }, {
3489 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3490 'only_matching': True,
3491 }, {
3492 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3493 'only_matching': True,
09f1580e 3494 }, {
3495 'note': 'A channel that is not live. Should raise error',
3496 'url': 'https://www.youtube.com/user/numberphile/live',
3497 'only_matching': True,
3d3dddc9 3498 }, {
3499 'url': 'https://www.youtube.com/feed/trending',
3500 'only_matching': True,
3501 }, {
3d3dddc9 3502 'url': 'https://www.youtube.com/feed/library',
3503 'only_matching': True,
3504 }, {
3d3dddc9 3505 'url': 'https://www.youtube.com/feed/history',
3506 'only_matching': True,
3507 }, {
3d3dddc9 3508 'url': 'https://www.youtube.com/feed/subscriptions',
3509 'only_matching': True,
3510 }, {
3d3dddc9 3511 'url': 'https://www.youtube.com/feed/watch_later',
3512 'only_matching': True,
3513 }, {
da692b79 3514 'note': 'Recommended - redirects to home page',
3d3dddc9 3515 'url': 'https://www.youtube.com/feed/recommended',
3516 'only_matching': True,
29f7c58a 3517 }, {
da692b79 3518 'note': 'inline playlist with not always working continuations',
29f7c58a 3519 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3520 'only_matching': True,
3521 }, {
3522 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3523 'only_matching': True,
3524 }, {
3525 'url': 'https://www.youtube.com/course',
3526 'only_matching': True,
3527 }, {
3528 'url': 'https://www.youtube.com/zsecurity',
3529 'only_matching': True,
3530 }, {
3531 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3532 'only_matching': True,
3533 }, {
3534 'url': 'https://www.youtube.com/TheYoungTurks/live',
3535 'only_matching': True,
39ed931e 3536 }, {
3537 'url': 'https://www.youtube.com/hashtag/cctv9',
3538 'info_dict': {
3539 'id': 'cctv9',
3540 'title': '#cctv9',
3541 },
3542 'playlist_mincount': 350,
201c1459 3543 }, {
3544 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3545 'only_matching': True,
9297939e 3546 }, {
da692b79 3547 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3548 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3549 'only_matching': True
fe03a6cd 3550 }, {
3551 'note': '/browse/ should redirect to /channel/',
3552 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3553 'only_matching': True
3554 }, {
3555 'note': 'VLPL, should redirect to playlist?list=PL...',
3556 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3557 'info_dict': {
3558 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3559 'uploader': 'NoCopyrightSounds',
3560 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3561 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3562 'title': 'NCS Releases',
3563 },
3564 'playlist_mincount': 166,
18db7548 3565 }, {
3566 'note': 'Topic, should redirect to playlist?list=UU...',
3567 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3568 'info_dict': {
3569 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3570 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3571 'title': 'Uploads from Royalty Free Music - Topic',
3572 'uploader': 'Royalty Free Music - Topic',
3573 },
3574 'expected_warnings': [
3575 'A channel/user page was given',
3576 'The URL does not have a videos tab',
3577 ],
3578 'playlist_mincount': 101,
3579 }, {
3580 'note': 'Topic without a UU playlist',
3581 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3582 'info_dict': {
3583 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3584 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3585 },
3586 'expected_warnings': [
3587 'A channel/user page was given',
3588 'The URL does not have a videos tab',
3589 'Falling back to channel URL',
3590 ],
3591 'playlist_mincount': 9,
abcdd12b 3592 }, {
3593 'note': 'Youtube music Album',
3594 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3595 'info_dict': {
3596 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3597 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3598 },
3599 'playlist_count': 50,
47193e02 3600 }, {
3601 'note': 'unlisted single video playlist',
3602 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3603 'info_dict': {
3604 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3605 'uploader': 'colethedj',
3606 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3607 'title': 'yt-dlp unlisted playlist test',
3608 'availability': 'unlisted'
3609 },
3610 'playlist_count': 1,
29f7c58a 3611 }]
3612
3613 @classmethod
3614 def suitable(cls, url):
3615 return False if YoutubeIE.suitable(url) else super(
3616 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3617
3618 def _extract_channel_id(self, webpage):
3619 channel_id = self._html_search_meta(
3620 'channelId', webpage, 'channel id', default=None)
3621 if channel_id:
3622 return channel_id
3623 channel_url = self._html_search_meta(
3624 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3625 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3626 'twitter:app:url:googleplay'), webpage, 'channel url')
3627 return self._search_regex(
3628 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3629 channel_url, 'channel id')
15f6397c 3630
8bdd16b4 3631 @staticmethod
cd7c66cf 3632 def _extract_basic_item_renderer(item):
3633 # Modified from _extract_grid_item_renderer
201c1459 3634 known_basic_renderers = (
3635 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3636 )
3637 for key, renderer in item.items():
201c1459 3638 if not isinstance(renderer, dict):
cd7c66cf 3639 continue
201c1459 3640 elif key in known_basic_renderers:
3641 return renderer
3642 elif key.startswith('grid') and key.endswith('Renderer'):
3643 return renderer
8bdd16b4 3644
8bdd16b4 3645 def _grid_entries(self, grid_renderer):
3646 for item in grid_renderer['items']:
3647 if not isinstance(item, dict):
39b62db1 3648 continue
cd7c66cf 3649 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3650 if not isinstance(renderer, dict):
3651 continue
052e1350 3652 title = self._get_text(renderer, 'title')
fe93e2c4 3653
8bdd16b4 3654 # playlist
3655 playlist_id = renderer.get('playlistId')
3656 if playlist_id:
3657 yield self.url_result(
3658 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3659 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3660 video_title=title)
201c1459 3661 continue
8bdd16b4 3662 # video
3663 video_id = renderer.get('videoId')
3664 if video_id:
3665 yield self._extract_video(renderer)
201c1459 3666 continue
8bdd16b4 3667 # channel
3668 channel_id = renderer.get('channelId')
3669 if channel_id:
8bdd16b4 3670 yield self.url_result(
3671 'https://www.youtube.com/channel/%s' % channel_id,
3672 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3673 continue
3674 # generic endpoint URL support
3675 ep_url = urljoin('https://www.youtube.com/', try_get(
3676 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3677 compat_str))
3678 if ep_url:
3679 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3680 if ie.suitable(ep_url):
3681 yield self.url_result(
3682 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3683 break
8bdd16b4 3684
3d3dddc9 3685 def _shelf_entries_from_content(self, shelf_renderer):
3686 content = shelf_renderer.get('content')
3687 if not isinstance(content, dict):
8bdd16b4 3688 return
cd7c66cf 3689 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3690 if renderer:
3691 # TODO: add support for nested playlists so each shelf is processed
3692 # as separate playlist
3693 # TODO: this includes only first N items
3694 for entry in self._grid_entries(renderer):
3695 yield entry
3696 renderer = content.get('horizontalListRenderer')
3697 if renderer:
3698 # TODO
3699 pass
8bdd16b4 3700
29f7c58a 3701 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3702 ep = try_get(
3703 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3704 compat_str)
3705 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3706 if shelf_url:
29f7c58a 3707 # Skipping links to another channels, note that checking for
3708 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3709 # will not work
3710 if skip_channels and '/channels?' in shelf_url:
3711 return
052e1350 3712 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3713 yield self.url_result(shelf_url, video_title=title)
3714 # Shelf may not contain shelf URL, fallback to extraction from content
3715 for entry in self._shelf_entries_from_content(shelf_renderer):
3716 yield entry
c5e8d7af 3717
8bdd16b4 3718 def _playlist_entries(self, video_list_renderer):
3719 for content in video_list_renderer['contents']:
3720 if not isinstance(content, dict):
3721 continue
3722 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3723 if not isinstance(renderer, dict):
3724 continue
3725 video_id = renderer.get('videoId')
3726 if not video_id:
3727 continue
3728 yield self._extract_video(renderer)
07aeced6 3729
3462ffa8 3730 def _rich_entries(self, rich_grid_renderer):
3731 renderer = try_get(
70d5c17b 3732 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3733 video_id = renderer.get('videoId')
3734 if not video_id:
3735 return
3736 yield self._extract_video(renderer)
3737
8bdd16b4 3738 def _video_entry(self, video_renderer):
3739 video_id = video_renderer.get('videoId')
3740 if video_id:
3741 return self._extract_video(video_renderer)
dacb3a86 3742
8bdd16b4 3743 def _post_thread_entries(self, post_thread_renderer):
3744 post_renderer = try_get(
3745 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3746 if not post_renderer:
3747 return
3748 # video attachment
3749 video_renderer = try_get(
895b0931 3750 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3751 video_id = video_renderer.get('videoId')
3752 if video_id:
3753 entry = self._extract_video(video_renderer)
8bdd16b4 3754 if entry:
3755 yield entry
895b0931 3756 # playlist attachment
3757 playlist_id = try_get(
3758 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3759 if playlist_id:
3760 yield self.url_result(
e28f1c0a 3761 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3762 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3763 # inline video links
3764 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3765 for run in runs:
3766 if not isinstance(run, dict):
3767 continue
3768 ep_url = try_get(
3769 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3770 if not ep_url:
3771 continue
3772 if not YoutubeIE.suitable(ep_url):
3773 continue
3774 ep_video_id = YoutubeIE._match_id(ep_url)
3775 if video_id == ep_video_id:
3776 continue
895b0931 3777 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3778
8bdd16b4 3779 def _post_thread_continuation_entries(self, post_thread_continuation):
3780 contents = post_thread_continuation.get('contents')
3781 if not isinstance(contents, list):
3782 return
3783 for content in contents:
3784 renderer = content.get('backstagePostThreadRenderer')
3785 if not isinstance(renderer, dict):
3786 continue
3787 for entry in self._post_thread_entries(renderer):
3788 yield entry
07aeced6 3789
39ed931e 3790 r''' # unused
3791 def _rich_grid_entries(self, contents):
3792 for content in contents:
3793 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3794 if video_renderer:
3795 entry = self._video_entry(video_renderer)
3796 if entry:
3797 yield entry
3798 '''
f4f751af 3799 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3800
70d5c17b 3801 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3802 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3803 for content in contents:
3804 if not isinstance(content, dict):
8bdd16b4 3805 continue
70d5c17b 3806 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3807 if not is_renderer:
70d5c17b 3808 renderer = content.get('richItemRenderer')
3462ffa8 3809 if renderer:
3810 for entry in self._rich_entries(renderer):
3811 yield entry
3812 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3813 continue
3462ffa8 3814 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3815 for isr_content in isr_contents:
3816 if not isinstance(isr_content, dict):
3817 continue
69184e41 3818
3819 known_renderers = {
3820 'playlistVideoListRenderer': self._playlist_entries,
3821 'gridRenderer': self._grid_entries,
3822 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3823 'backstagePostThreadRenderer': self._post_thread_entries,
3824 'videoRenderer': lambda x: [self._video_entry(x)],
3825 }
3826 for key, renderer in isr_content.items():
3827 if key not in known_renderers:
3828 continue
3829 for entry in known_renderers[key](renderer):
3830 if entry:
3831 yield entry
3462ffa8 3832 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3833 break
70d5c17b 3834
3462ffa8 3835 if not continuation_list[0]:
3836 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3837
3838 if not continuation_list[0]:
3839 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3840
3841 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3842 tab_content = try_get(tab, lambda x: x['content'], dict)
3843 if not tab_content:
3844 return
3462ffa8 3845 parent_renderer = (
29f7c58a 3846 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3847 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3848 for entry in extract_entries(parent_renderer):
3849 yield entry
3462ffa8 3850 continuation = continuation_list[0]
fe93e2c4 3851 visitor_data = None
d069eca7 3852
8bdd16b4 3853 for page_num in itertools.count(1):
3854 if not continuation:
3855 break
11f9be09 3856 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3857 response = self._extract_response(
3858 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3859 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3860 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3861
3862 if not response:
8bdd16b4 3863 break
f4f751af 3864 visitor_data = try_get(
3865 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3866
69184e41 3867 known_continuation_renderers = {
3868 'playlistVideoListContinuation': self._playlist_entries,
3869 'gridContinuation': self._grid_entries,
3870 'itemSectionContinuation': self._post_thread_continuation_entries,
3871 'sectionListContinuation': extract_entries, # for feeds
3872 }
8bdd16b4 3873 continuation_contents = try_get(
69184e41 3874 response, lambda x: x['continuationContents'], dict) or {}
3875 continuation_renderer = None
3876 for key, value in continuation_contents.items():
3877 if key not in known_continuation_renderers:
3462ffa8 3878 continue
69184e41 3879 continuation_renderer = value
3880 continuation_list = [None]
3881 for entry in known_continuation_renderers[key](continuation_renderer):
3882 yield entry
3883 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3884 break
3885 if continuation_renderer:
3886 continue
c5e8d7af 3887
a1b535bd 3888 known_renderers = {
3889 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3890 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3891 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3892 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3893 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3894 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3895 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3896 }
cce889b9 3897 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3898 continuation_items = try_get(
cce889b9 3899 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3900 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3901 video_items_renderer = None
3902 for key, value in continuation_item.items():
3903 if key not in known_renderers:
8bdd16b4 3904 continue
a1b535bd 3905 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3906 continuation_list = [None]
a1b535bd 3907 for entry in known_renderers[key][0](video_items_renderer):
3908 yield entry
9ba5705a 3909 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3910 break
3911 if video_items_renderer:
3912 continue
8bdd16b4 3913 break
9558dcec 3914
8bdd16b4 3915 @staticmethod
3916 def _extract_selected_tab(tabs):
3917 for tab in tabs:
cd684175 3918 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3919 if renderer.get('selected') is True:
3920 return renderer
2b3c2546 3921 else:
8bdd16b4 3922 raise ExtractorError('Unable to find selected tab')
b82f815f 3923
47193e02 3924 @classmethod
3925 def _extract_uploader(cls, data):
8bdd16b4 3926 uploader = {}
47193e02 3927 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3928 owner = try_get(
3929 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3930 if owner:
3931 uploader['uploader'] = owner.get('text')
3932 uploader['uploader_id'] = try_get(
3933 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3934 uploader['uploader_url'] = urljoin(
3935 'https://www.youtube.com/',
3936 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3937 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3938
d069eca7 3939 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3940 playlist_id = title = description = channel_url = channel_name = channel_id = None
3941 thumbnails_list = tags = []
3942
8bdd16b4 3943 selected_tab = self._extract_selected_tab(tabs)
3944 renderer = try_get(
3945 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3946 if renderer:
b60419c5 3947 channel_name = renderer.get('title')
3948 channel_url = renderer.get('channelUrl')
3949 channel_id = renderer.get('externalId')
39ed931e 3950 else:
64c0d954 3951 renderer = try_get(
3952 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3953
8bdd16b4 3954 if renderer:
3955 title = renderer.get('title')
ecc97af3 3956 description = renderer.get('description', '')
b60419c5 3957 playlist_id = channel_id
3958 tags = renderer.get('keywords', '').split()
3959 thumbnails_list = (
3960 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3961 or try_get(
47193e02 3962 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3963 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3964 list)
b60419c5 3965 or [])
3966
3967 thumbnails = []
3968 for t in thumbnails_list:
3969 if not isinstance(t, dict):
3970 continue
3971 thumbnail_url = url_or_none(t.get('url'))
3972 if not thumbnail_url:
3973 continue
3974 thumbnails.append({
3975 'url': thumbnail_url,
3976 'width': int_or_none(t.get('width')),
3977 'height': int_or_none(t.get('height')),
3978 })
3462ffa8 3979 if playlist_id is None:
70d5c17b 3980 playlist_id = item_id
3981 if title is None:
39ed931e 3982 title = (
3983 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3984 or playlist_id)
b60419c5 3985 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3986 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3987 metadata = {
3988 'playlist_id': playlist_id,
3989 'playlist_title': title,
3990 'playlist_description': description,
3991 'uploader': channel_name,
3992 'uploader_id': channel_id,
3993 'uploader_url': channel_url,
3994 'thumbnails': thumbnails,
3995 'tags': tags,
3996 }
47193e02 3997 availability = self._extract_availability(data)
3998 if availability:
3999 metadata['availability'] = availability
b60419c5 4000 if not channel_id:
4001 metadata.update(self._extract_uploader(data))
4002 metadata.update({
4003 'channel': metadata['uploader'],
4004 'channel_id': metadata['uploader_id'],
4005 'channel_url': metadata['uploader_url']})
11f9be09 4006 ytcfg = self.extract_ytcfg(item_id, webpage)
b60419c5 4007 return self.playlist_result(
d069eca7
M
4008 self._entries(
4009 selected_tab, playlist_id,
4010 self._extract_identity_token(webpage, item_id),
fe93e2c4 4011 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 4012 **metadata)
73c4ac2c 4013
79360d99 4014 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 4015 first_id = last_id = None
11f9be09 4016 ytcfg = self.extract_ytcfg(playlist_id, webpage)
4017 headers = self.generate_api_headers(
fe93e2c4 4018 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4019 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 4020 for page_num in itertools.count(1):
cd7c66cf 4021 videos = list(self._playlist_entries(playlist))
4022 if not videos:
4023 return
2be71994 4024 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4025 if start >= len(videos):
4026 return
4027 for video in videos[start:]:
4028 if video['id'] == first_id:
4029 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4030 return
4031 yield video
4032 first_id = first_id or videos[0]['id']
4033 last_id = videos[-1]['id']
79360d99 4034 watch_endpoint = try_get(
4035 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4036 query = {
4037 'playlistId': playlist_id,
4038 'videoId': watch_endpoint.get('videoId') or last_id,
4039 'index': watch_endpoint.get('index') or len(videos),
4040 'params': watch_endpoint.get('params') or 'OAE%3D'
4041 }
4042 response = self._extract_response(
4043 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4044 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4045 check_get_keys='contents'
4046 )
cd7c66cf 4047 playlist = try_get(
79360d99 4048 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4049
79360d99 4050 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 4051 title = playlist.get('title') or try_get(
4052 data, lambda x: x['titleText']['simpleText'], compat_str)
4053 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4054
4055 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4056 playlist_url = urljoin(url, try_get(
4057 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4058 compat_str))
4059 if playlist_url and playlist_url != url:
4060 return self.url_result(
4061 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4062 video_title=title)
cd7c66cf 4063
8bdd16b4 4064 return self.playlist_result(
79360d99 4065 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 4066 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4067
47193e02 4068 def _extract_availability(self, data):
4069 """
4070 Gets the availability of a given playlist/tab.
4071 Note: Unless YouTube tells us explicitly, we do not assume it is public
4072 @param data: response
4073 """
4074 is_private = is_unlisted = None
4075 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4076 badge_labels = self._extract_badges(renderer)
4077
4078 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4079 privacy_dropdown_entries = try_get(
4080 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4081 for renderer_dict in privacy_dropdown_entries:
4082 is_selected = try_get(
4083 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4084 if not is_selected:
4085 continue
052e1350 4086 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4087 if label:
4088 badge_labels.add(label.lower())
4089 break
4090
4091 for badge_label in badge_labels:
4092 if badge_label == 'unlisted':
4093 is_unlisted = True
4094 elif badge_label == 'private':
4095 is_private = True
4096 elif badge_label == 'public':
4097 is_unlisted = is_private = False
4098 return self._availability(is_private, False, False, False, is_unlisted)
4099
4100 @staticmethod
4101 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4102 sidebar_renderer = try_get(
4103 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4104 for item in sidebar_renderer:
4105 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4106 if renderer:
4107 return renderer
4108
358de58c 4109 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4110 """
4111 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4112 """
5d342002 4113 browse_id = params = None
47193e02 4114 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4115 if not renderer:
4116 return
4117 menu_renderer = try_get(
4118 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4119 for menu_item in menu_renderer:
4120 if not isinstance(menu_item, dict):
358de58c 4121 continue
47193e02 4122 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4123 text = try_get(
4124 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4125 if not text or text.lower() != 'show unavailable videos':
4126 continue
4127 browse_endpoint = try_get(
4128 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4129 browse_id = browse_endpoint.get('browseId')
4130 params = browse_endpoint.get('params')
4131 break
5d342002 4132
11f9be09 4133 ytcfg = self.extract_ytcfg(item_id, webpage)
4134 headers = self.generate_api_headers(
fe93e2c4 4135 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 4136 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4137 visitor_data=try_get(
4138 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4139 query = {
4140 'params': params or 'wgYCCAA=',
4141 'browseId': browse_id or 'VL%s' % item_id
4142 }
4143 return self._extract_response(
4144 item_id=item_id, headers=headers, query=query,
fe93e2c4 4145 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4146 note='Downloading API JSON with unavailable videos')
358de58c 4147
cd7c66cf 4148 def _extract_webpage(self, url, item_id):
a06916d9 4149 retries = self.get_param('extractor_retries', 3)
62bff2c1 4150 count = -1
c705177d 4151 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4152 while count < retries:
62bff2c1 4153 count += 1
14fdfea9 4154 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4155 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4156 if count:
c705177d 4157 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4158 webpage = self._download_webpage(
4159 url, item_id,
cd7c66cf 4160 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
11f9be09 4161 data = self.extract_yt_initial_data(item_id, webpage)
14fdfea9 4162 if data.get('contents') or data.get('currentVideoEndpoint'):
4163 break
95c01b6c 4164 # Extract alerts here only when there is error
4165 self._extract_and_report_alerts(data)
c705177d 4166 if count >= retries:
6a39ee13 4167 raise ExtractorError(last_error)
cd7c66cf 4168 return webpage, data
4169
9297939e 4170 @staticmethod
4171 def _smuggle_data(entries, data):
4172 for entry in entries:
4173 if data:
4174 entry['url'] = smuggle_url(entry['url'], data)
4175 yield entry
4176
cd7c66cf 4177 def _real_extract(self, url):
9297939e 4178 url, smuggled_data = unsmuggle_url(url, {})
4179 if self.is_music_url(url):
4180 smuggled_data['is_music_url'] = True
fe03a6cd 4181 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4182 if info_dict.get('entries'):
4183 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4184 return info_dict
4185
fe03a6cd 4186 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4187
4188 def __real_extract(self, url, smuggled_data):
cd7c66cf 4189 item_id = self._match_id(url)
4190 url = compat_urlparse.urlunparse(
4191 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4192 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4193
fe03a6cd 4194 def get_mobj(url):
4195 mobj = self._url_re.match(url).groupdict()
07cce701 4196 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4197 return mobj
4198
4199 mobj = get_mobj(url)
4200 # Youtube returns incomplete data if tabname is not lower case
4201 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4202
4203 if is_channel:
4204 if smuggled_data.get('is_music_url'):
4205 if item_id[:2] == 'VL':
4206 # Youtube music VL channels have an equivalent playlist
4207 item_id = item_id[2:]
4208 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4209 elif item_id[:2] == 'MP':
4210 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4211 item_id = self._search_regex(
4212 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4213 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4214 'playlist id')
4215 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4216 elif mobj['channel_type'] == 'browse':
4217 # Youtube music /browse/ should be changed to /channel/
4218 pre = 'https://www.youtube.com/channel/%s' % item_id
4219 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4220 # Home URLs should redirect to /videos/
6a39ee13 4221 self.report_warning(
cd7c66cf 4222 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4223 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4224 tab = '/videos'
4225
4226 url = ''.join((pre, tab, post))
4227 mobj = get_mobj(url)
cd7c66cf 4228
4229 # Handle both video/playlist URLs
201c1459 4230 qs = parse_qs(url)
cd7c66cf 4231 video_id = qs.get('v', [None])[0]
4232 playlist_id = qs.get('list', [None])[0]
4233
fe03a6cd 4234 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4235 if not playlist_id:
fe03a6cd 4236 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4237 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4238 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4239 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4240 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4241 mobj = get_mobj(url)
cd7c66cf 4242
4243 if video_id and playlist_id:
a06916d9 4244 if self.get_param('noplaylist'):
cd7c66cf 4245 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4246 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4247 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4248
4249 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4250
18db7548 4251 tabs = try_get(
4252 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4253 if tabs:
4254 selected_tab = self._extract_selected_tab(tabs)
4255 tab_name = selected_tab.get('title', '')
09f1580e 4256 if 'no-youtube-channel-redirect' not in compat_opts:
4257 if mobj['tab'] == '/live':
4258 # Live tab should have redirected to the video
4259 raise ExtractorError('The channel is not currently live', expected=True)
4260 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4261 if not mobj['not_channel'] and item_id[:2] == 'UC':
4262 # Topic channels don't have /videos. Use the equivalent playlist instead
4263 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4264 pl_id = 'UU%s' % item_id[2:]
4265 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4266 try:
4267 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4268 for alert_type, alert_message in self._extract_alerts(pl_data):
4269 if alert_type == 'error':
4270 raise ExtractorError('Youtube said: %s' % alert_message)
4271 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4272 except ExtractorError:
4273 self.report_warning('The playlist gave error. Falling back to channel URL')
4274 else:
4275 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4276
4277 self.write_debug('Final URL: %s' % url)
4278
358de58c 4279 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4280 if 'no-youtube-unavailable-videos' not in compat_opts:
4281 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4282 self._extract_and_report_alerts(data)
8bdd16b4 4283 tabs = try_get(
4284 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4285 if tabs:
d069eca7 4286 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4287
8bdd16b4 4288 playlist = try_get(
4289 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4290 if playlist:
79360d99 4291 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4292
a0566bbf 4293 video_id = try_get(
4294 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4295 compat_str) or video_id
8bdd16b4 4296 if video_id:
09f1580e 4297 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4298 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4299 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4300
8bdd16b4 4301 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4302
c5e8d7af 4303
8bdd16b4 4304class YoutubePlaylistIE(InfoExtractor):
4305 IE_DESC = 'YouTube.com playlists'
4306 _VALID_URL = r'''(?x)(?:
4307 (?:https?://)?
4308 (?:\w+\.)?
4309 (?:
4310 (?:
4311 youtube(?:kids)?\.com|
29f7c58a 4312 invidio\.us
8bdd16b4 4313 )
4314 /.*?\?.*?\blist=
4315 )?
4316 (?P<id>%(playlist_id)s)
4317 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4318 IE_NAME = 'youtube:playlist'
cdc628a4 4319 _TESTS = [{
8bdd16b4 4320 'note': 'issue #673',
4321 'url': 'PLBB231211A4F62143',
cdc628a4 4322 'info_dict': {
8bdd16b4 4323 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4324 'id': 'PLBB231211A4F62143',
4325 'uploader': 'Wickydoo',
4326 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4327 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4328 },
4329 'playlist_mincount': 29,
4330 }, {
4331 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4332 'info_dict': {
4333 'title': 'YDL_safe_search',
4334 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4335 },
4336 'playlist_count': 2,
4337 'skip': 'This playlist is private',
9558dcec 4338 }, {
8bdd16b4 4339 'note': 'embedded',
4340 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4341 'playlist_count': 4,
9558dcec 4342 'info_dict': {
8bdd16b4 4343 'title': 'JODA15',
4344 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4345 'uploader': 'milan',
4346 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4347 }
cdc628a4 4348 }, {
8bdd16b4 4349 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4350 'playlist_mincount': 654,
8bdd16b4 4351 'info_dict': {
4352 'title': '2018 Chinese New Singles (11/6 updated)',
4353 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4354 'uploader': 'LBK',
4355 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4356 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4357 }
daa0df9e 4358 }, {
29f7c58a 4359 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4360 'only_matching': True,
4361 }, {
4362 # music album playlist
4363 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4364 'only_matching': True,
4365 }]
4366
4367 @classmethod
4368 def suitable(cls, url):
201c1459 4369 if YoutubeTabIE.suitable(url):
4370 return False
1bdae7d3 4371 # Hack for lazy extractors until more generic solution is implemented
4372 # (see #28780)
4373 from .youtube import parse_qs
201c1459 4374 qs = parse_qs(url)
4375 if qs.get('v', [None])[0]:
4376 return False
4377 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4378
4379 def _real_extract(self, url):
4380 playlist_id = self._match_id(url)
46953e7e 4381 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4382 url = update_url_query(
4383 'https://www.youtube.com/playlist',
4384 parse_qs(url) or {'list': playlist_id})
4385 if is_music_url:
4386 url = smuggle_url(url, {'is_music_url': True})
4387 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4388
4389
4390class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4391 IE_DESC = 'youtu.be'
29f7c58a 4392 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4393 _TESTS = [{
8bdd16b4 4394 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4395 'info_dict': {
4396 'id': 'yeWKywCrFtk',
4397 'ext': 'mp4',
4398 'title': 'Small Scale Baler and Braiding Rugs',
4399 'uploader': 'Backus-Page House Museum',
4400 'uploader_id': 'backuspagemuseum',
4401 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4402 'upload_date': '20161008',
4403 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4404 'categories': ['Nonprofits & Activism'],
4405 'tags': list,
4406 'like_count': int,
4407 'dislike_count': int,
4408 },
4409 'params': {
4410 'noplaylist': True,
4411 'skip_download': True,
4412 },
39e7107d 4413 }, {
8bdd16b4 4414 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4415 'only_matching': True,
cdc628a4
PH
4416 }]
4417
8bdd16b4 4418 def _real_extract(self, url):
29f7c58a 4419 mobj = re.match(self._VALID_URL, url)
4420 video_id = mobj.group('id')
4421 playlist_id = mobj.group('playlist_id')
8bdd16b4 4422 return self.url_result(
29f7c58a 4423 update_url_query('https://www.youtube.com/watch', {
4424 'v': video_id,
4425 'list': playlist_id,
4426 'feature': 'youtu.be',
4427 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4428
4429
4430class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4431 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4432 _VALID_URL = r'ytuser:(?P<id>.+)'
4433 _TESTS = [{
4434 'url': 'ytuser:phihag',
4435 'only_matching': True,
4436 }]
4437
4438 def _real_extract(self, url):
4439 user_id = self._match_id(url)
4440 return self.url_result(
4441 'https://www.youtube.com/user/%s' % user_id,
4442 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4443
b05654f0 4444
3d3dddc9 4445class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4446 IE_NAME = 'youtube:favorites'
4447 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4448 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4449 _LOGIN_REQUIRED = True
4450 _TESTS = [{
4451 'url': ':ytfav',
4452 'only_matching': True,
4453 }, {
4454 'url': ':ytfavorites',
4455 'only_matching': True,
4456 }]
4457
4458 def _real_extract(self, url):
4459 return self.url_result(
4460 'https://www.youtube.com/playlist?list=LL',
4461 ie=YoutubeTabIE.ie_key())
4462
4463
79360d99 4464class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4465 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4466 # there doesn't appear to be a real limit, for example if you search for
4467 # 'python' you get more than 8.000.000 results
4468 _MAX_RESULTS = float('inf')
78caa52a 4469 IE_NAME = 'youtube:search'
b05654f0 4470 _SEARCH_KEY = 'ytsearch'
6c894ea1 4471 _SEARCH_PARAMS = None
9dd8e46a 4472 _TESTS = []
b05654f0 4473
6c894ea1 4474 def _entries(self, query, n):
a5c56234 4475 data = {'query': query}
6c894ea1
U
4476 if self._SEARCH_PARAMS:
4477 data['params'] = self._SEARCH_PARAMS
4478 total = 0
fe93e2c4 4479 continuation = {}
6c894ea1 4480 for page_num in itertools.count(1):
fe93e2c4 4481 data.update(continuation)
79360d99 4482 search = self._extract_response(
4483 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4484 check_get_keys=('contents', 'onResponseReceivedCommands')
4485 )
6c894ea1 4486 if not search:
b4c08069 4487 break
6c894ea1
U
4488 slr_contents = try_get(
4489 search,
4490 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4491 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4492 list)
4493 if not slr_contents:
a22b2fd1 4494 break
0366ae87 4495
0366ae87
M
4496 # Youtube sometimes adds promoted content to searches,
4497 # changing the index location of videos and token.
4498 # So we search through all entries till we find them.
fe93e2c4 4499 continuation = None
30a074c2 4500 for slr_content in slr_contents:
fe93e2c4 4501 if not continuation:
4502 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4503
30a074c2 4504 isr_contents = try_get(
4505 slr_content,
4506 lambda x: x['itemSectionRenderer']['contents'],
4507 list)
9da76d30 4508 if not isr_contents:
30a074c2 4509 continue
4510 for content in isr_contents:
4511 if not isinstance(content, dict):
4512 continue
4513 video = content.get('videoRenderer')
4514 if not isinstance(video, dict):
4515 continue
4516 video_id = video.get('videoId')
4517 if not video_id:
4518 continue
4519
4520 yield self._extract_video(video)
4521 total += 1
4522 if total == n:
4523 return
0366ae87 4524
fe93e2c4 4525 if not continuation:
6c894ea1 4526 break
b05654f0 4527
6c894ea1
U
4528 def _get_n_results(self, query, n):
4529 """Get a specified number of results for a query"""
11f9be09 4530 return self.playlist_result(self._entries(query, n), query, query)
75dff0ee 4531
c9ae7b95 4532
a3dd9248 4533class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4534 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4535 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4536 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4537 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4538
c9ae7b95 4539
386e1dd9 4540class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4541 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4542 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4543 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4544 # _MAX_RESULTS = 100
3462ffa8 4545 _TESTS = [{
4546 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4547 'playlist_mincount': 5,
4548 'info_dict': {
11f9be09 4549 'id': 'youtube-dl test video',
3462ffa8 4550 'title': 'youtube-dl test video',
4551 }
4552 }, {
4553 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4554 'only_matching': True,
4555 }]
4556
386e1dd9 4557 @classmethod
4558 def _make_valid_url(cls):
4559 return cls._VALID_URL
4560
3462ffa8 4561 def _real_extract(self, url):
386e1dd9 4562 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4563 query = (qs.get('search_query') or qs.get('q'))[0]
4564 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4565 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4566
4567
4568class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4569 """
25f14e9f 4570 Base class for feed extractors
3d3dddc9 4571 Subclasses must define the _FEED_NAME property.
d7ae0639 4572 """
b2e8bc1b 4573 _LOGIN_REQUIRED = True
ef2f3c7f 4574 _TESTS = []
d7ae0639
JMF
4575
4576 @property
4577 def IE_NAME(self):
78caa52a 4578 return 'youtube:%s' % self._FEED_NAME
04cc9617 4579
3853309f 4580 def _real_extract(self, url):
3d3dddc9 4581 return self.url_result(
4582 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4583 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4584
4585
ef2f3c7f 4586class YoutubeWatchLaterIE(InfoExtractor):
4587 IE_NAME = 'youtube:watchlater'
70d5c17b 4588 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4589 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4590 _TESTS = [{
8bdd16b4 4591 'url': ':ytwatchlater',
bc7a9cd8
S
4592 'only_matching': True,
4593 }]
25f14e9f
S
4594
4595 def _real_extract(self, url):
ef2f3c7f 4596 return self.url_result(
4597 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4598
4599
25f14e9f
S
4600class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4601 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4602 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4603 _FEED_NAME = 'recommended'
45db527f 4604 _LOGIN_REQUIRED = False
3d3dddc9 4605 _TESTS = [{
4606 'url': ':ytrec',
4607 'only_matching': True,
4608 }, {
4609 'url': ':ytrecommended',
4610 'only_matching': True,
4611 }, {
4612 'url': 'https://youtube.com',
4613 'only_matching': True,
4614 }]
1ed5b5c9 4615
1ed5b5c9 4616
25f14e9f 4617class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4618 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4619 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4620 _FEED_NAME = 'subscriptions'
3d3dddc9 4621 _TESTS = [{
4622 'url': ':ytsubs',
4623 'only_matching': True,
4624 }, {
4625 'url': ':ytsubscriptions',
4626 'only_matching': True,
4627 }]
1ed5b5c9 4628
1ed5b5c9 4629
25f14e9f 4630class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4631 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4632 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4633 _FEED_NAME = 'history'
3d3dddc9 4634 _TESTS = [{
4635 'url': ':ythistory',
4636 'only_matching': True,
4637 }]
1ed5b5c9
JMF
4638
4639
15870e90
PH
4640class YoutubeTruncatedURLIE(InfoExtractor):
4641 IE_NAME = 'youtube:truncated_url'
4642 IE_DESC = False # Do not list
975d35db 4643 _VALID_URL = r'''(?x)
b95aab84
PH
4644 (?:https?://)?
4645 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4646 (?:watch\?(?:
c4808c60 4647 feature=[a-z_]+|
b95aab84
PH
4648 annotation_id=annotation_[^&]+|
4649 x-yt-cl=[0-9]+|
c1708b89 4650 hl=[^&]*|
287be8c6 4651 t=[0-9]+
b95aab84
PH
4652 )?
4653 |
4654 attribution_link\?a=[^&]+
4655 )
4656 $
975d35db 4657 '''
15870e90 4658
c4808c60 4659 _TESTS = [{
2d3d2997 4660 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4661 'only_matching': True,
dc2fc736 4662 }, {
2d3d2997 4663 'url': 'https://www.youtube.com/watch?',
dc2fc736 4664 'only_matching': True,
b95aab84
PH
4665 }, {
4666 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4667 'only_matching': True,
4668 }, {
4669 'url': 'https://www.youtube.com/watch?feature=foo',
4670 'only_matching': True,
c1708b89
PH
4671 }, {
4672 'url': 'https://www.youtube.com/watch?hl=en-GB',
4673 'only_matching': True,
287be8c6
PH
4674 }, {
4675 'url': 'https://www.youtube.com/watch?t=2372',
4676 'only_matching': True,
c4808c60
PH
4677 }]
4678
15870e90
PH
4679 def _real_extract(self, url):
4680 raise ExtractorError(
78caa52a
PH
4681 'Did you forget to quote the URL? Remember that & is a meta '
4682 'character in most shells, so you want to put the URL in quotes, '
3867038a 4683 'like youtube-dl '
2d3d2997 4684 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4685 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4686 expected=True)
772fd5cc
PH
4687
4688
4689class YoutubeTruncatedIDIE(InfoExtractor):
4690 IE_NAME = 'youtube:truncated_id'
4691 IE_DESC = False # Do not list
b95aab84 4692 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4693
4694 _TESTS = [{
4695 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4696 'only_matching': True,
4697 }]
4698
4699 def _real_extract(self, url):
4700 video_id = self._match_id(url)
4701 raise ExtractorError(
4702 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4703 expected=True)