]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
Add field `live_status`
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import base64
6 import calendar
7 import copy
8 import datetime
9 import hashlib
10 import itertools
11 import json
12 import os.path
13 import random
14 import re
15 import time
16 import traceback
17
18 from .common import InfoExtractor, SearchInfoExtractor
19 from ..compat import (
20 compat_chr,
21 compat_HTTPError,
22 compat_parse_qs,
23 compat_str,
24 compat_urllib_parse_unquote_plus,
25 compat_urllib_parse_urlencode,
26 compat_urllib_parse_urlparse,
27 compat_urlparse,
28 )
29 from ..jsinterp import JSInterpreter
30 from ..utils import (
31 bytes_to_intlist,
32 clean_html,
33 datetime_from_str,
34 dict_get,
35 error_to_compat_str,
36 ExtractorError,
37 float_or_none,
38 format_field,
39 int_or_none,
40 intlist_to_bytes,
41 mimetype2ext,
42 orderedSet,
43 parse_codecs,
44 parse_count,
45 parse_duration,
46 qualities,
47 remove_start,
48 smuggle_url,
49 str_or_none,
50 str_to_int,
51 traverse_obj,
52 try_get,
53 unescapeHTML,
54 unified_strdate,
55 unsmuggle_url,
56 update_url_query,
57 url_or_none,
58 urlencode_postdata,
59 urljoin,
60 variadic,
61 )
62
63
64 def parse_qs(url):
65 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
66
67
68 class YoutubeBaseInfoExtractor(InfoExtractor):
69 """Provide base functions for Youtube extractors"""
70 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
71 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
72
73 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
74 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
75 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
76
77 _RESERVED_NAMES = (
78 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
79 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
80 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
81
82 _NETRC_MACHINE = 'youtube'
83 # If True it will raise an error if no login info is provided
84 _LOGIN_REQUIRED = False
85
86 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
87
88 def _login(self):
89 """
90 Attempt to log in to YouTube.
91 True is returned if successful or skipped.
92 False is returned if login failed.
93
94 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
95 """
96
97 def warn(message):
98 self.report_warning(message)
99
100 # username+password login is broken
101 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
102 self.raise_login_required(
103 'Login details are needed to download this content', method='cookies')
104 username, password = self._get_login_info()
105 if username:
106 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
107 return
108
109 # Everything below this is broken!
110 r'''
111 # No authentication to be performed
112 if username is None:
113 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
114 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
115 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
116 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
117 return True
118
119 login_page = self._download_webpage(
120 self._LOGIN_URL, None,
121 note='Downloading login page',
122 errnote='unable to fetch login page', fatal=False)
123 if login_page is False:
124 return
125
126 login_form = self._hidden_inputs(login_page)
127
128 def req(url, f_req, note, errnote):
129 data = login_form.copy()
130 data.update({
131 'pstMsg': 1,
132 'checkConnection': 'youtube',
133 'checkedDomains': 'youtube',
134 'hl': 'en',
135 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
136 'f.req': json.dumps(f_req),
137 'flowName': 'GlifWebSignIn',
138 'flowEntry': 'ServiceLogin',
139 # TODO: reverse actual botguard identifier generation algo
140 'bgRequest': '["identifier",""]',
141 })
142 return self._download_json(
143 url, None, note=note, errnote=errnote,
144 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
145 fatal=False,
146 data=urlencode_postdata(data), headers={
147 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
148 'Google-Accounts-XSRF': 1,
149 })
150
151 lookup_req = [
152 username,
153 None, [], None, 'US', None, None, 2, False, True,
154 [
155 None, None,
156 [2, 1, None, 1,
157 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
158 None, [], 4],
159 1, [None, None, []], None, None, None, True
160 ],
161 username,
162 ]
163
164 lookup_results = req(
165 self._LOOKUP_URL, lookup_req,
166 'Looking up account info', 'Unable to look up account info')
167
168 if lookup_results is False:
169 return False
170
171 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
172 if not user_hash:
173 warn('Unable to extract user hash')
174 return False
175
176 challenge_req = [
177 user_hash,
178 None, 1, None, [1, None, None, None, [password, None, True]],
179 [
180 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
181 1, [None, None, []], None, None, None, True
182 ]]
183
184 challenge_results = req(
185 self._CHALLENGE_URL, challenge_req,
186 'Logging in', 'Unable to log in')
187
188 if challenge_results is False:
189 return
190
191 login_res = try_get(challenge_results, lambda x: x[0][5], list)
192 if login_res:
193 login_msg = try_get(login_res, lambda x: x[5], compat_str)
194 warn(
195 'Unable to login: %s' % 'Invalid password'
196 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
197 return False
198
199 res = try_get(challenge_results, lambda x: x[0][-1], list)
200 if not res:
201 warn('Unable to extract result entry')
202 return False
203
204 login_challenge = try_get(res, lambda x: x[0][0], list)
205 if login_challenge:
206 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
207 if challenge_str == 'TWO_STEP_VERIFICATION':
208 # SEND_SUCCESS - TFA code has been successfully sent to phone
209 # QUOTA_EXCEEDED - reached the limit of TFA codes
210 status = try_get(login_challenge, lambda x: x[5], compat_str)
211 if status == 'QUOTA_EXCEEDED':
212 warn('Exceeded the limit of TFA codes, try later')
213 return False
214
215 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
216 if not tl:
217 warn('Unable to extract TL')
218 return False
219
220 tfa_code = self._get_tfa_info('2-step verification code')
221
222 if not tfa_code:
223 warn(
224 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
225 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
226 return False
227
228 tfa_code = remove_start(tfa_code, 'G-')
229
230 tfa_req = [
231 user_hash, None, 2, None,
232 [
233 9, None, None, None, None, None, None, None,
234 [None, tfa_code, True, 2]
235 ]]
236
237 tfa_results = req(
238 self._TFA_URL.format(tl), tfa_req,
239 'Submitting TFA code', 'Unable to submit TFA code')
240
241 if tfa_results is False:
242 return False
243
244 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
245 if tfa_res:
246 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
247 warn(
248 'Unable to finish TFA: %s' % 'Invalid TFA code'
249 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
250 return False
251
252 check_cookie_url = try_get(
253 tfa_results, lambda x: x[0][-1][2], compat_str)
254 else:
255 CHALLENGES = {
256 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
257 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
258 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
259 }
260 challenge = CHALLENGES.get(
261 challenge_str,
262 '%s returned error %s.' % (self.IE_NAME, challenge_str))
263 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
264 return False
265 else:
266 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
267
268 if not check_cookie_url:
269 warn('Unable to extract CheckCookie URL')
270 return False
271
272 check_cookie_results = self._download_webpage(
273 check_cookie_url, None, 'Checking cookie', fatal=False)
274
275 if check_cookie_results is False:
276 return False
277
278 if 'https://myaccount.google.com/' not in check_cookie_results:
279 warn('Unable to log in')
280 return False
281
282 return True
283 '''
284
285 def _initialize_consent(self):
286 cookies = self._get_cookies('https://www.youtube.com/')
287 if cookies.get('__Secure-3PSID'):
288 return
289 consent_id = None
290 consent = cookies.get('CONSENT')
291 if consent:
292 if 'YES' in consent.value:
293 return
294 consent_id = self._search_regex(
295 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
296 if not consent_id:
297 consent_id = random.randint(100, 999)
298 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
299
300 def _real_initialize(self):
301 self._initialize_consent()
302 if self._downloader is None:
303 return
304 if not self._login():
305 return
306
307 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
308 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
309 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
310
311 _YT_DEFAULT_YTCFGS = {
312 'WEB': {
313 'INNERTUBE_API_VERSION': 'v1',
314 'INNERTUBE_CLIENT_NAME': 'WEB',
315 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
316 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
317 'INNERTUBE_CONTEXT': {
318 'client': {
319 'clientName': 'WEB',
320 'clientVersion': '2.20210622.10.00',
321 'hl': 'en',
322 }
323 },
324 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
325 },
326 'WEB_REMIX': {
327 'INNERTUBE_API_VERSION': 'v1',
328 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
329 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
330 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
331 'INNERTUBE_CONTEXT': {
332 'client': {
333 'clientName': 'WEB_REMIX',
334 'clientVersion': '1.20210621.00.00',
335 'hl': 'en',
336 }
337 },
338 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
339 },
340 'WEB_EMBEDDED_PLAYER': {
341 'INNERTUBE_API_VERSION': 'v1',
342 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
343 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
344 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
345 'INNERTUBE_CONTEXT': {
346 'client': {
347 'clientName': 'WEB_EMBEDDED_PLAYER',
348 'clientVersion': '1.20210620.0.1',
349 'hl': 'en',
350 }
351 },
352 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
353 },
354 'ANDROID': {
355 'INNERTUBE_API_VERSION': 'v1',
356 'INNERTUBE_CLIENT_NAME': 'ANDROID',
357 'INNERTUBE_CLIENT_VERSION': '16.20',
358 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
359 'INNERTUBE_CONTEXT': {
360 'client': {
361 'clientName': 'ANDROID',
362 'clientVersion': '16.20',
363 'hl': 'en',
364 }
365 },
366 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
367 },
368 'ANDROID_EMBEDDED_PLAYER': {
369 'INNERTUBE_API_VERSION': 'v1',
370 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
371 'INNERTUBE_CLIENT_VERSION': '16.20',
372 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
373 'INNERTUBE_CONTEXT': {
374 'client': {
375 'clientName': 'ANDROID_EMBEDDED_PLAYER',
376 'clientVersion': '16.20',
377 'hl': 'en',
378 }
379 },
380 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
381 },
382 'ANDROID_MUSIC': {
383 'INNERTUBE_API_VERSION': 'v1',
384 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
385 'INNERTUBE_CLIENT_VERSION': '4.32',
386 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
387 'INNERTUBE_CONTEXT': {
388 'client': {
389 'clientName': 'ANDROID_MUSIC',
390 'clientVersion': '4.32',
391 'hl': 'en',
392 }
393 },
394 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
395 },
396 'IOS': {
397 'INNERTUBE_API_VERSION': 'v1',
398 'INNERTUBE_CLIENT_NAME': 'IOS',
399 'INNERTUBE_CLIENT_VERSION': '16.20',
400 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
401 'INNERTUBE_CONTEXT': {
402 'client': {
403 'clientName': 'IOS',
404 'clientVersion': '16.20',
405 'hl': 'en',
406 }
407 },
408 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
409
410 },
411 'IOS_MUSIC': {
412 'INNERTUBE_API_VERSION': 'v1',
413 'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
414 'INNERTUBE_CLIENT_VERSION': '4.32',
415 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
416 'INNERTUBE_CONTEXT': {
417 'client': {
418 'clientName': 'IOS_MUSIC',
419 'clientVersion': '4.32',
420 'hl': 'en',
421 }
422 },
423 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
424 },
425 'IOS_MESSAGES_EXTENSION': {
426 'INNERTUBE_API_VERSION': 'v1',
427 'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
428 'INNERTUBE_CLIENT_VERSION': '16.20',
429 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
430 'INNERTUBE_CONTEXT': {
431 'client': {
432 'clientName': 'IOS_MESSAGES_EXTENSION',
433 'clientVersion': '16.20',
434 'hl': 'en',
435 }
436 },
437 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
438 }
439 }
440
441 _YT_DEFAULT_INNERTUBE_HOSTS = {
442 'DIRECT': 'youtubei.googleapis.com',
443 'WEB': 'www.youtube.com',
444 'WEB_REMIX': 'music.youtube.com',
445 'ANDROID_MUSIC': 'music.youtube.com'
446 }
447
448 # clients starting with _ cannot be explicity requested by the user
449 _YT_CLIENTS = {
450 'web': 'WEB',
451 'web_music': 'WEB_REMIX',
452 '_web_embedded': 'WEB_EMBEDDED_PLAYER',
453 '_web_agegate': 'TVHTML5',
454 'android': 'ANDROID',
455 'android_music': 'ANDROID_MUSIC',
456 '_android_embedded': 'ANDROID_EMBEDDED_PLAYER',
457 '_android_agegate': 'ANDROID',
458 'ios': 'IOS',
459 'ios_music': 'IOS_MUSIC',
460 '_ios_embedded': 'IOS_MESSAGES_EXTENSION',
461 '_ios_agegate': 'IOS'
462 }
463
464 def _get_default_ytcfg(self, client='WEB'):
465 if client in self._YT_DEFAULT_YTCFGS:
466 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
467 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
468 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
469
470 def _get_innertube_host(self, client='WEB'):
471 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
472
473 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
474 # try_get but with fallback to default ytcfg client values when present
475 _func = lambda y: try_get(y, getter, expected_type)
476 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
477
478 def _extract_client_name(self, ytcfg, default_client='WEB'):
479 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
480
481 @staticmethod
482 def _extract_session_index(*data):
483 for ytcfg in data:
484 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
485 if session_index is not None:
486 return session_index
487
488 def _extract_client_version(self, ytcfg, default_client='WEB'):
489 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
490
491 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
492 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
493
494 def _extract_context(self, ytcfg=None, default_client='WEB'):
495 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
496 context = _get_context(ytcfg)
497 if context:
498 return context
499
500 context = _get_context(self._get_default_ytcfg(default_client))
501 if not ytcfg:
502 return context
503
504 # Recreate the client context (required)
505 context['client'].update({
506 'clientVersion': self._extract_client_version(ytcfg, default_client),
507 'clientName': self._extract_client_name(ytcfg, default_client),
508 })
509 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
510 if visitor_data:
511 context['client']['visitorData'] = visitor_data
512 return context
513
514 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
515 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
516 # See: https://github.com/yt-dlp/yt-dlp/issues/393
517 yt_cookies = self._get_cookies('https://www.youtube.com')
518 sapisid_cookie = dict_get(
519 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
520 if sapisid_cookie is None:
521 return
522 time_now = round(time.time())
523 # SAPISID cookie is required if not already present
524 if not yt_cookies.get('SAPISID'):
525 self._set_cookie(
526 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
527 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
528 sapisidhash = hashlib.sha1(
529 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
530 return f'SAPISIDHASH {time_now}_{sapisidhash}'
531
532 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
533 note='Downloading API JSON', errnote='Unable to download API page',
534 context=None, api_key=None, api_hostname=None, default_client='WEB'):
535
536 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
537 data.update(query)
538 real_headers = self.generate_api_headers(default_client=default_client)
539 real_headers.update({'content-type': 'application/json'})
540 if headers:
541 real_headers.update(headers)
542 return self._download_json(
543 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
544 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
545 data=json.dumps(data).encode('utf8'), headers=real_headers,
546 query={'key': api_key or self._extract_api_key()})
547
548 def extract_yt_initial_data(self, video_id, webpage):
549 return self._parse_json(
550 self._search_regex(
551 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
552 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
553 video_id)
554
555 def _extract_identity_token(self, webpage, item_id):
556 if not webpage:
557 return None
558 ytcfg = self.extract_ytcfg(item_id, webpage)
559 if ytcfg:
560 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
561 if token:
562 return token
563 return self._search_regex(
564 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
565 'identity token', default=None)
566
567 @staticmethod
568 def _extract_account_syncid(*args):
569 """
570 Extract syncId required to download private playlists of secondary channels
571 @params response and/or ytcfg
572 """
573 for data in args:
574 # ytcfg includes channel_syncid if on secondary channel
575 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
576 if delegated_sid:
577 return delegated_sid
578 sync_ids = (try_get(
579 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
580 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
581 if len(sync_ids) >= 2 and sync_ids[1]:
582 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
583 # and just "user_syncid||" for primary channel. We only want the channel_syncid
584 return sync_ids[0]
585
586 def extract_ytcfg(self, video_id, webpage):
587 if not webpage:
588 return {}
589 return self._parse_json(
590 self._search_regex(
591 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
592 default='{}'), video_id, fatal=False) or {}
593
594 def generate_api_headers(
595 self, ytcfg=None, identity_token=None, account_syncid=None,
596 visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
597 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
598 headers = {
599 'X-YouTube-Client-Name': compat_str(
600 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
601 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
602 'Origin': origin
603 }
604 if not visitor_data and ytcfg:
605 visitor_data = try_get(
606 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
607 if identity_token:
608 headers['X-Youtube-Identity-Token'] = identity_token
609 if account_syncid:
610 headers['X-Goog-PageId'] = account_syncid
611 if session_index is None and ytcfg:
612 session_index = self._extract_session_index(ytcfg)
613 if account_syncid or session_index is not None:
614 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
615 if visitor_data:
616 headers['X-Goog-Visitor-Id'] = visitor_data
617 auth = self._generate_sapisidhash_header(origin)
618 if auth is not None:
619 headers['Authorization'] = auth
620 headers['X-Origin'] = origin
621 return headers
622
623 @staticmethod
624 def _build_api_continuation_query(continuation, ctp=None):
625 query = {
626 'continuation': continuation
627 }
628 # TODO: Inconsistency with clickTrackingParams.
629 # Currently we have a fixed ctp contained within context (from ytcfg)
630 # and a ctp in root query for continuation.
631 if ctp:
632 query['clickTracking'] = {'clickTrackingParams': ctp}
633 return query
634
635 @classmethod
636 def _extract_next_continuation_data(cls, renderer):
637 next_continuation = try_get(
638 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
639 lambda x: x['continuation']['reloadContinuationData']), dict)
640 if not next_continuation:
641 return
642 continuation = next_continuation.get('continuation')
643 if not continuation:
644 return
645 ctp = next_continuation.get('clickTrackingParams')
646 return cls._build_api_continuation_query(continuation, ctp)
647
648 @classmethod
649 def _extract_continuation_ep_data(cls, continuation_ep: dict):
650 if isinstance(continuation_ep, dict):
651 continuation = try_get(
652 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
653 if not continuation:
654 return
655 ctp = continuation_ep.get('clickTrackingParams')
656 return cls._build_api_continuation_query(continuation, ctp)
657
658 @classmethod
659 def _extract_continuation(cls, renderer):
660 next_continuation = cls._extract_next_continuation_data(renderer)
661 if next_continuation:
662 return next_continuation
663
664 contents = []
665 for key in ('contents', 'items'):
666 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
667
668 for content in contents:
669 if not isinstance(content, dict):
670 continue
671 continuation_ep = try_get(
672 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
673 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
674 dict)
675 continuation = cls._extract_continuation_ep_data(continuation_ep)
676 if continuation:
677 return continuation
678
679 @classmethod
680 def _extract_alerts(cls, data):
681 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
682 if not isinstance(alert_dict, dict):
683 continue
684 for alert in alert_dict.values():
685 alert_type = alert.get('type')
686 if not alert_type:
687 continue
688 message = cls._get_text(alert.get('text'))
689 if message:
690 yield alert_type, message
691
692 def _report_alerts(self, alerts, expected=True):
693 errors = []
694 warnings = []
695 for alert_type, alert_message in alerts:
696 if alert_type.lower() == 'error':
697 errors.append([alert_type, alert_message])
698 else:
699 warnings.append([alert_type, alert_message])
700
701 for alert_type, alert_message in (warnings + errors[:-1]):
702 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
703 if errors:
704 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
705
706 def _extract_and_report_alerts(self, data, *args, **kwargs):
707 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
708
709 def _extract_badges(self, renderer: dict):
710 badges = set()
711 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
712 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
713 if label:
714 badges.add(label.lower())
715 return badges
716
717 @staticmethod
718 def _get_text(data, getter=None, max_runs=None):
719 for get in variadic(getter):
720 d = try_get(data, get) if get is not None else data
721 text = try_get(d, lambda x: x['simpleText'], compat_str)
722 if text:
723 return text
724 runs = try_get(d, lambda x: x['runs'], list) or []
725 if not runs and isinstance(d, list):
726 runs = d
727
728 def get_runs(runs):
729 for run in runs[:min(len(runs), max_runs or len(runs))]:
730 yield try_get(run, lambda x: x['text'], compat_str) or ''
731
732 text = ''.join(get_runs(runs))
733 if text:
734 return text
735
736 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
737 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
738 default_client='WEB'):
739 response = None
740 last_error = None
741 count = -1
742 retries = self.get_param('extractor_retries', 3)
743 if check_get_keys is None:
744 check_get_keys = []
745 while count < retries:
746 count += 1
747 if last_error:
748 self.report_warning('%s. Retrying ...' % last_error)
749 try:
750 response = self._call_api(
751 ep=ep, fatal=True, headers=headers,
752 video_id=item_id, query=query,
753 context=self._extract_context(ytcfg, default_client),
754 api_key=self._extract_api_key(ytcfg, default_client),
755 api_hostname=api_hostname, default_client=default_client,
756 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
757 except ExtractorError as e:
758 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
759 # Downloading page may result in intermittent 5xx HTTP error
760 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
761 last_error = 'HTTP Error %s' % e.cause.code
762 if count < retries:
763 continue
764 if fatal:
765 raise
766 else:
767 self.report_warning(error_to_compat_str(e))
768 return
769
770 else:
771 # Youtube may send alerts if there was an issue with the continuation page
772 try:
773 self._extract_and_report_alerts(response, expected=False)
774 except ExtractorError as e:
775 if fatal:
776 raise
777 self.report_warning(error_to_compat_str(e))
778 return
779 if not check_get_keys or dict_get(response, check_get_keys):
780 break
781 # Youtube sometimes sends incomplete data
782 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
783 last_error = 'Incomplete data received'
784 if count >= retries:
785 if fatal:
786 raise ExtractorError(last_error)
787 else:
788 self.report_warning(last_error)
789 return
790 return response
791
792 @staticmethod
793 def is_music_url(url):
794 return re.match(r'https?://music\.youtube\.com/', url) is not None
795
796 def _extract_video(self, renderer):
797 video_id = renderer.get('videoId')
798 title = self._get_text(renderer.get('title'))
799 description = self._get_text(renderer.get('descriptionSnippet'))
800 duration = parse_duration(self._get_text(renderer.get('lengthText')))
801 view_count_text = self._get_text(renderer.get('viewCountText')) or ''
802 view_count = str_to_int(self._search_regex(
803 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
804 'view count', default=None))
805
806 uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
807
808 return {
809 '_type': 'url',
810 'ie_key': YoutubeIE.ie_key(),
811 'id': video_id,
812 'url': video_id,
813 'title': title,
814 'description': description,
815 'duration': duration,
816 'view_count': view_count,
817 'uploader': uploader,
818 }
819
820
821 class YoutubeIE(YoutubeBaseInfoExtractor):
822 IE_DESC = 'YouTube.com'
823 _INVIDIOUS_SITES = (
824 # invidious-redirect websites
825 r'(?:www\.)?redirect\.invidious\.io',
826 r'(?:(?:www|dev)\.)?invidio\.us',
827 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
828 r'(?:www\.)?invidious\.pussthecat\.org',
829 r'(?:www\.)?invidious\.zee\.li',
830 r'(?:www\.)?invidious\.ethibox\.fr',
831 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
832 # youtube-dl invidious instances list
833 r'(?:(?:www|no)\.)?invidiou\.sh',
834 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
835 r'(?:www\.)?invidious\.kabi\.tk',
836 r'(?:www\.)?invidious\.mastodon\.host',
837 r'(?:www\.)?invidious\.zapashcanon\.fr',
838 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
839 r'(?:www\.)?invidious\.tinfoil-hat\.net',
840 r'(?:www\.)?invidious\.himiko\.cloud',
841 r'(?:www\.)?invidious\.reallyancient\.tech',
842 r'(?:www\.)?invidious\.tube',
843 r'(?:www\.)?invidiou\.site',
844 r'(?:www\.)?invidious\.site',
845 r'(?:www\.)?invidious\.xyz',
846 r'(?:www\.)?invidious\.nixnet\.xyz',
847 r'(?:www\.)?invidious\.048596\.xyz',
848 r'(?:www\.)?invidious\.drycat\.fr',
849 r'(?:www\.)?inv\.skyn3t\.in',
850 r'(?:www\.)?tube\.poal\.co',
851 r'(?:www\.)?tube\.connect\.cafe',
852 r'(?:www\.)?vid\.wxzm\.sx',
853 r'(?:www\.)?vid\.mint\.lgbt',
854 r'(?:www\.)?vid\.puffyan\.us',
855 r'(?:www\.)?yewtu\.be',
856 r'(?:www\.)?yt\.elukerio\.org',
857 r'(?:www\.)?yt\.lelux\.fi',
858 r'(?:www\.)?invidious\.ggc-project\.de',
859 r'(?:www\.)?yt\.maisputain\.ovh',
860 r'(?:www\.)?ytprivate\.com',
861 r'(?:www\.)?invidious\.13ad\.de',
862 r'(?:www\.)?invidious\.toot\.koeln',
863 r'(?:www\.)?invidious\.fdn\.fr',
864 r'(?:www\.)?watch\.nettohikari\.com',
865 r'(?:www\.)?invidious\.namazso\.eu',
866 r'(?:www\.)?invidious\.silkky\.cloud',
867 r'(?:www\.)?invidious\.exonip\.de',
868 r'(?:www\.)?invidious\.riverside\.rocks',
869 r'(?:www\.)?invidious\.blamefran\.net',
870 r'(?:www\.)?invidious\.moomoo\.de',
871 r'(?:www\.)?ytb\.trom\.tf',
872 r'(?:www\.)?yt\.cyberhost\.uk',
873 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
874 r'(?:www\.)?qklhadlycap4cnod\.onion',
875 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
876 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
877 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
878 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
879 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
880 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
881 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
882 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
883 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
884 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
885 )
886 _VALID_URL = r"""(?x)^
887 (
888 (?:https?://|//) # http(s):// or protocol-independent URL
889 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
890 (?:www\.)?deturl\.com/www\.youtube\.com|
891 (?:www\.)?pwnyoutube\.com|
892 (?:www\.)?hooktube\.com|
893 (?:www\.)?yourepeat\.com|
894 tube\.majestyc\.net|
895 %(invidious)s|
896 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
897 (?:.*?\#/)? # handle anchor (#/) redirect urls
898 (?: # the various things that can precede the ID:
899 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
900 |(?: # or the v= param in all its forms
901 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
902 (?:\?|\#!?) # the params delimiter ? or # or #!
903 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
904 v=
905 )
906 ))
907 |(?:
908 youtu\.be| # just youtu.be/xxxx
909 vid\.plus| # or vid.plus/xxxx
910 zwearz\.com/watch| # or zwearz.com/watch/xxxx
911 %(invidious)s
912 )/
913 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
914 )
915 )? # all until now is optional -> you can pass the naked ID
916 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
917 (?(1).+)? # if we found the ID, everything can follow
918 (?:\#|$)""" % {
919 'invidious': '|'.join(_INVIDIOUS_SITES),
920 }
921 _PLAYER_INFO_RE = (
922 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
923 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
924 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
925 )
926 _formats = {
927 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
928 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
929 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
930 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
931 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
932 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
933 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
934 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
935 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
936 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
937 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
938 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
939 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
940 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
941 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
942 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
943 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
944 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
945
946
947 # 3D videos
948 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
949 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
950 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
951 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
952 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
953 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
954 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
955
956 # Apple HTTP Live Streaming
957 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
958 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
959 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
960 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
961 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
962 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
963 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
964 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
965
966 # DASH mp4 video
967 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
968 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
969 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
970 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
971 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
972 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
973 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
974 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
975 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
976 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
977 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
978 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
979
980 # Dash mp4 audio
981 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
982 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
983 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
984 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
985 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
986 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
987 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
988
989 # Dash webm
990 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
991 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
992 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
993 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
994 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
995 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
996 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
997 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
998 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
999 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1000 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1001 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1002 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1003 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1004 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1005 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1006 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1007 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1008 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1009 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1010 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1012
1013 # Dash webm audio
1014 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1015 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1016
1017 # Dash webm audio with opus inside
1018 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1019 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1020 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1021
1022 # RTMP (unnamed)
1023 '_rtmp': {'protocol': 'rtmp'},
1024
1025 # av01 video only formats sometimes served with "unknown" codecs
1026 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1027 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1028 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1029 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1030 }
1031 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1032
1033 _AGE_GATE_REASONS = (
1034 'Sign in to confirm your age',
1035 'This video may be inappropriate for some users.',
1036 'Sorry, this content is age-restricted.')
1037
1038 _GEO_BYPASS = False
1039
1040 IE_NAME = 'youtube'
1041 _TESTS = [
1042 {
1043 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1044 'info_dict': {
1045 'id': 'BaW_jenozKc',
1046 'ext': 'mp4',
1047 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1048 'uploader': 'Philipp Hagemeister',
1049 'uploader_id': 'phihag',
1050 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1051 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1052 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1053 'upload_date': '20121002',
1054 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1055 'categories': ['Science & Technology'],
1056 'tags': ['youtube-dl'],
1057 'duration': 10,
1058 'view_count': int,
1059 'like_count': int,
1060 'dislike_count': int,
1061 'start_time': 1,
1062 'end_time': 9,
1063 }
1064 },
1065 {
1066 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1067 'note': 'Embed-only video (#1746)',
1068 'info_dict': {
1069 'id': 'yZIXLfi8CZQ',
1070 'ext': 'mp4',
1071 'upload_date': '20120608',
1072 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1073 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1074 'uploader': 'SET India',
1075 'uploader_id': 'setindia',
1076 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1077 'age_limit': 18,
1078 },
1079 'skip': 'Private video',
1080 },
1081 {
1082 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1083 'note': 'Use the first video ID in the URL',
1084 'info_dict': {
1085 'id': 'BaW_jenozKc',
1086 'ext': 'mp4',
1087 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1088 'uploader': 'Philipp Hagemeister',
1089 'uploader_id': 'phihag',
1090 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1091 'upload_date': '20121002',
1092 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1093 'categories': ['Science & Technology'],
1094 'tags': ['youtube-dl'],
1095 'duration': 10,
1096 'view_count': int,
1097 'like_count': int,
1098 'dislike_count': int,
1099 },
1100 'params': {
1101 'skip_download': True,
1102 },
1103 },
1104 {
1105 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1106 'note': '256k DASH audio (format 141) via DASH manifest',
1107 'info_dict': {
1108 'id': 'a9LDPn-MO4I',
1109 'ext': 'm4a',
1110 'upload_date': '20121002',
1111 'uploader_id': '8KVIDEO',
1112 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1113 'description': '',
1114 'uploader': '8KVIDEO',
1115 'title': 'UHDTV TEST 8K VIDEO.mp4'
1116 },
1117 'params': {
1118 'youtube_include_dash_manifest': True,
1119 'format': '141',
1120 },
1121 'skip': 'format 141 not served anymore',
1122 },
1123 # DASH manifest with encrypted signature
1124 {
1125 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1126 'info_dict': {
1127 'id': 'IB3lcPjvWLA',
1128 'ext': 'm4a',
1129 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1130 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1131 'duration': 244,
1132 'uploader': 'AfrojackVEVO',
1133 'uploader_id': 'AfrojackVEVO',
1134 'upload_date': '20131011',
1135 'abr': 129.495,
1136 },
1137 'params': {
1138 'youtube_include_dash_manifest': True,
1139 'format': '141/bestaudio[ext=m4a]',
1140 },
1141 },
1142 # Normal age-gate video (embed allowed)
1143 {
1144 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1145 'info_dict': {
1146 'id': 'HtVdAasjOgU',
1147 'ext': 'mp4',
1148 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1149 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1150 'duration': 142,
1151 'uploader': 'The Witcher',
1152 'uploader_id': 'WitcherGame',
1153 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1154 'upload_date': '20140605',
1155 'age_limit': 18,
1156 },
1157 },
1158 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1159 # YouTube Red ad is not captured for creator
1160 {
1161 'url': '__2ABJjxzNo',
1162 'info_dict': {
1163 'id': '__2ABJjxzNo',
1164 'ext': 'mp4',
1165 'duration': 266,
1166 'upload_date': '20100430',
1167 'uploader_id': 'deadmau5',
1168 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1169 'creator': 'deadmau5',
1170 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1171 'uploader': 'deadmau5',
1172 'title': 'Deadmau5 - Some Chords (HD)',
1173 'alt_title': 'Some Chords',
1174 },
1175 'expected_warnings': [
1176 'DASH manifest missing',
1177 ]
1178 },
1179 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1180 {
1181 'url': 'lqQg6PlCWgI',
1182 'info_dict': {
1183 'id': 'lqQg6PlCWgI',
1184 'ext': 'mp4',
1185 'duration': 6085,
1186 'upload_date': '20150827',
1187 'uploader_id': 'olympic',
1188 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1189 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1190 'uploader': 'Olympics',
1191 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1192 },
1193 'params': {
1194 'skip_download': 'requires avconv',
1195 }
1196 },
1197 # Non-square pixels
1198 {
1199 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1200 'info_dict': {
1201 'id': '_b-2C3KPAM0',
1202 'ext': 'mp4',
1203 'stretched_ratio': 16 / 9.,
1204 'duration': 85,
1205 'upload_date': '20110310',
1206 'uploader_id': 'AllenMeow',
1207 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1208 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1209 'uploader': '孫ᄋᄅ',
1210 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1211 },
1212 },
1213 # url_encoded_fmt_stream_map is empty string
1214 {
1215 'url': 'qEJwOuvDf7I',
1216 'info_dict': {
1217 'id': 'qEJwOuvDf7I',
1218 'ext': 'webm',
1219 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1220 'description': '',
1221 'upload_date': '20150404',
1222 'uploader_id': 'spbelect',
1223 'uploader': 'Наблюдатели Петербурга',
1224 },
1225 'params': {
1226 'skip_download': 'requires avconv',
1227 },
1228 'skip': 'This live event has ended.',
1229 },
1230 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1231 {
1232 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1233 'info_dict': {
1234 'id': 'FIl7x6_3R5Y',
1235 'ext': 'webm',
1236 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1237 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1238 'duration': 220,
1239 'upload_date': '20150625',
1240 'uploader_id': 'dorappi2000',
1241 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1242 'uploader': 'dorappi2000',
1243 'formats': 'mincount:31',
1244 },
1245 'skip': 'not actual anymore',
1246 },
1247 # DASH manifest with segment_list
1248 {
1249 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1250 'md5': '8ce563a1d667b599d21064e982ab9e31',
1251 'info_dict': {
1252 'id': 'CsmdDsKjzN8',
1253 'ext': 'mp4',
1254 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1255 'uploader': 'Airtek',
1256 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1257 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1258 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1259 },
1260 'params': {
1261 'youtube_include_dash_manifest': True,
1262 'format': '135', # bestvideo
1263 },
1264 'skip': 'This live event has ended.',
1265 },
1266 {
1267 # Multifeed videos (multiple cameras), URL is for Main Camera
1268 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1269 'info_dict': {
1270 'id': 'jvGDaLqkpTg',
1271 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1272 'description': 'md5:e03b909557865076822aa169218d6a5d',
1273 },
1274 'playlist': [{
1275 'info_dict': {
1276 'id': 'jvGDaLqkpTg',
1277 'ext': 'mp4',
1278 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1279 'description': 'md5:e03b909557865076822aa169218d6a5d',
1280 'duration': 10643,
1281 'upload_date': '20161111',
1282 'uploader': 'Team PGP',
1283 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1284 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1285 },
1286 }, {
1287 'info_dict': {
1288 'id': '3AKt1R1aDnw',
1289 'ext': 'mp4',
1290 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1291 'description': 'md5:e03b909557865076822aa169218d6a5d',
1292 'duration': 10991,
1293 'upload_date': '20161111',
1294 'uploader': 'Team PGP',
1295 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1296 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1297 },
1298 }, {
1299 'info_dict': {
1300 'id': 'RtAMM00gpVc',
1301 'ext': 'mp4',
1302 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1303 'description': 'md5:e03b909557865076822aa169218d6a5d',
1304 'duration': 10995,
1305 'upload_date': '20161111',
1306 'uploader': 'Team PGP',
1307 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1308 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1309 },
1310 }, {
1311 'info_dict': {
1312 'id': '6N2fdlP3C5U',
1313 'ext': 'mp4',
1314 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1315 'description': 'md5:e03b909557865076822aa169218d6a5d',
1316 'duration': 10990,
1317 'upload_date': '20161111',
1318 'uploader': 'Team PGP',
1319 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1320 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1321 },
1322 }],
1323 'params': {
1324 'skip_download': True,
1325 },
1326 },
1327 {
1328 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1329 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1330 'info_dict': {
1331 'id': 'gVfLd0zydlo',
1332 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1333 },
1334 'playlist_count': 2,
1335 'skip': 'Not multifeed anymore',
1336 },
1337 {
1338 'url': 'https://vid.plus/FlRa-iH7PGw',
1339 'only_matching': True,
1340 },
1341 {
1342 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1343 'only_matching': True,
1344 },
1345 {
1346 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1347 # Also tests cut-off URL expansion in video description (see
1348 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1349 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1350 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1351 'info_dict': {
1352 'id': 'lsguqyKfVQg',
1353 'ext': 'mp4',
1354 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1355 'alt_title': 'Dark Walk',
1356 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1357 'duration': 133,
1358 'upload_date': '20151119',
1359 'uploader_id': 'IronSoulElf',
1360 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1361 'uploader': 'IronSoulElf',
1362 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1363 'track': 'Dark Walk',
1364 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1365 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1366 },
1367 'params': {
1368 'skip_download': True,
1369 },
1370 },
1371 {
1372 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1373 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1374 'only_matching': True,
1375 },
1376 {
1377 # Video with yt:stretch=17:0
1378 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1379 'info_dict': {
1380 'id': 'Q39EVAstoRM',
1381 'ext': 'mp4',
1382 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1383 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1384 'upload_date': '20151107',
1385 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1386 'uploader': 'CH GAMER DROID',
1387 },
1388 'params': {
1389 'skip_download': True,
1390 },
1391 'skip': 'This video does not exist.',
1392 },
1393 {
1394 # Video with incomplete 'yt:stretch=16:'
1395 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1396 'only_matching': True,
1397 },
1398 {
1399 # Video licensed under Creative Commons
1400 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1401 'info_dict': {
1402 'id': 'M4gD1WSo5mA',
1403 'ext': 'mp4',
1404 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1405 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1406 'duration': 721,
1407 'upload_date': '20150127',
1408 'uploader_id': 'BerkmanCenter',
1409 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1410 'uploader': 'The Berkman Klein Center for Internet & Society',
1411 'license': 'Creative Commons Attribution license (reuse allowed)',
1412 },
1413 'params': {
1414 'skip_download': True,
1415 },
1416 },
1417 {
1418 # Channel-like uploader_url
1419 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1420 'info_dict': {
1421 'id': 'eQcmzGIKrzg',
1422 'ext': 'mp4',
1423 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1424 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1425 'duration': 4060,
1426 'upload_date': '20151119',
1427 'uploader': 'Bernie Sanders',
1428 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1429 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1430 'license': 'Creative Commons Attribution license (reuse allowed)',
1431 },
1432 'params': {
1433 'skip_download': True,
1434 },
1435 },
1436 {
1437 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1438 'only_matching': True,
1439 },
1440 {
1441 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1442 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1443 'only_matching': True,
1444 },
1445 {
1446 # Rental video preview
1447 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1448 'info_dict': {
1449 'id': 'uGpuVWrhIzE',
1450 'ext': 'mp4',
1451 'title': 'Piku - Trailer',
1452 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1453 'upload_date': '20150811',
1454 'uploader': 'FlixMatrix',
1455 'uploader_id': 'FlixMatrixKaravan',
1456 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1457 'license': 'Standard YouTube License',
1458 },
1459 'params': {
1460 'skip_download': True,
1461 },
1462 'skip': 'This video is not available.',
1463 },
1464 {
1465 # YouTube Red video with episode data
1466 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1467 'info_dict': {
1468 'id': 'iqKdEhx-dD4',
1469 'ext': 'mp4',
1470 'title': 'Isolation - Mind Field (Ep 1)',
1471 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1472 'duration': 2085,
1473 'upload_date': '20170118',
1474 'uploader': 'Vsauce',
1475 'uploader_id': 'Vsauce',
1476 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1477 'series': 'Mind Field',
1478 'season_number': 1,
1479 'episode_number': 1,
1480 },
1481 'params': {
1482 'skip_download': True,
1483 },
1484 'expected_warnings': [
1485 'Skipping DASH manifest',
1486 ],
1487 },
1488 {
1489 # The following content has been identified by the YouTube community
1490 # as inappropriate or offensive to some audiences.
1491 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1492 'info_dict': {
1493 'id': '6SJNVb0GnPI',
1494 'ext': 'mp4',
1495 'title': 'Race Differences in Intelligence',
1496 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1497 'duration': 965,
1498 'upload_date': '20140124',
1499 'uploader': 'New Century Foundation',
1500 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1501 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1502 },
1503 'params': {
1504 'skip_download': True,
1505 },
1506 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1507 },
1508 {
1509 # itag 212
1510 'url': '1t24XAntNCY',
1511 'only_matching': True,
1512 },
1513 {
1514 # geo restricted to JP
1515 'url': 'sJL6WA-aGkQ',
1516 'only_matching': True,
1517 },
1518 {
1519 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1520 'only_matching': True,
1521 },
1522 {
1523 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1524 'only_matching': True,
1525 },
1526 {
1527 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1528 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1529 'only_matching': True,
1530 },
1531 {
1532 # DRM protected
1533 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1534 'only_matching': True,
1535 },
1536 {
1537 # Video with unsupported adaptive stream type formats
1538 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1539 'info_dict': {
1540 'id': 'Z4Vy8R84T1U',
1541 'ext': 'mp4',
1542 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1543 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1544 'duration': 433,
1545 'upload_date': '20130923',
1546 'uploader': 'Amelia Putri Harwita',
1547 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1548 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1549 'formats': 'maxcount:10',
1550 },
1551 'params': {
1552 'skip_download': True,
1553 'youtube_include_dash_manifest': False,
1554 },
1555 'skip': 'not actual anymore',
1556 },
1557 {
1558 # Youtube Music Auto-generated description
1559 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1560 'info_dict': {
1561 'id': 'MgNrAu2pzNs',
1562 'ext': 'mp4',
1563 'title': 'Voyeur Girl',
1564 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1565 'upload_date': '20190312',
1566 'uploader': 'Stephen - Topic',
1567 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1568 'artist': 'Stephen',
1569 'track': 'Voyeur Girl',
1570 'album': 'it\'s too much love to know my dear',
1571 'release_date': '20190313',
1572 'release_year': 2019,
1573 },
1574 'params': {
1575 'skip_download': True,
1576 },
1577 },
1578 {
1579 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1580 'only_matching': True,
1581 },
1582 {
1583 # invalid -> valid video id redirection
1584 'url': 'DJztXj2GPfl',
1585 'info_dict': {
1586 'id': 'DJztXj2GPfk',
1587 'ext': 'mp4',
1588 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1589 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1590 'upload_date': '20090125',
1591 'uploader': 'Prochorowka',
1592 'uploader_id': 'Prochorowka',
1593 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1594 'artist': 'Panjabi MC',
1595 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1596 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1597 },
1598 'params': {
1599 'skip_download': True,
1600 },
1601 'skip': 'Video unavailable',
1602 },
1603 {
1604 # empty description results in an empty string
1605 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1606 'info_dict': {
1607 'id': 'x41yOUIvK2k',
1608 'ext': 'mp4',
1609 'title': 'IMG 3456',
1610 'description': '',
1611 'upload_date': '20170613',
1612 'uploader_id': 'ElevageOrVert',
1613 'uploader': 'ElevageOrVert',
1614 },
1615 'params': {
1616 'skip_download': True,
1617 },
1618 },
1619 {
1620 # with '};' inside yt initial data (see [1])
1621 # see [2] for an example with '};' inside ytInitialPlayerResponse
1622 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1623 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1624 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1625 'info_dict': {
1626 'id': 'CHqg6qOn4no',
1627 'ext': 'mp4',
1628 'title': 'Part 77 Sort a list of simple types in c#',
1629 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1630 'upload_date': '20130831',
1631 'uploader_id': 'kudvenkat',
1632 'uploader': 'kudvenkat',
1633 },
1634 'params': {
1635 'skip_download': True,
1636 },
1637 },
1638 {
1639 # another example of '};' in ytInitialData
1640 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1641 'only_matching': True,
1642 },
1643 {
1644 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1645 'only_matching': True,
1646 },
1647 {
1648 # https://github.com/ytdl-org/youtube-dl/pull/28094
1649 'url': 'OtqTfy26tG0',
1650 'info_dict': {
1651 'id': 'OtqTfy26tG0',
1652 'ext': 'mp4',
1653 'title': 'Burn Out',
1654 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1655 'upload_date': '20141120',
1656 'uploader': 'The Cinematic Orchestra - Topic',
1657 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1658 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1659 'artist': 'The Cinematic Orchestra',
1660 'track': 'Burn Out',
1661 'album': 'Every Day',
1662 'release_data': None,
1663 'release_year': None,
1664 },
1665 'params': {
1666 'skip_download': True,
1667 },
1668 },
1669 {
1670 # controversial video, only works with bpctr when authenticated with cookies
1671 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1672 'only_matching': True,
1673 },
1674 {
1675 # controversial video, requires bpctr/contentCheckOk
1676 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1677 'info_dict': {
1678 'id': 'SZJvDhaSDnc',
1679 'ext': 'mp4',
1680 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1681 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1682 'uploader': 'CBS This Morning',
1683 'uploader_id': 'CBSThisMorning',
1684 'upload_date': '20140716',
1685 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1686 }
1687 },
1688 {
1689 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1690 'url': 'cBvYw8_A0vQ',
1691 'info_dict': {
1692 'id': 'cBvYw8_A0vQ',
1693 'ext': 'mp4',
1694 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1695 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1696 'upload_date': '20201120',
1697 'uploader': 'Walk around Japan',
1698 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1699 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1700 },
1701 'params': {
1702 'skip_download': True,
1703 },
1704 }, {
1705 # Has multiple audio streams
1706 'url': 'WaOKSUlf4TM',
1707 'only_matching': True
1708 }, {
1709 # Requires Premium: has format 141 when requested using YTM url
1710 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1711 'only_matching': True
1712 }, {
1713 # multiple subtitles with same lang_code
1714 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1715 'only_matching': True,
1716 }, {
1717 # Force use android client fallback
1718 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1719 'info_dict': {
1720 'id': 'YOelRv7fMxY',
1721 'title': 'DIGGING A SECRET TUNNEL Part 1',
1722 'ext': '3gp',
1723 'upload_date': '20210624',
1724 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1725 'uploader': 'colinfurze',
1726 'uploader_id': 'colinfurze',
1727 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1728 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1729 },
1730 'params': {
1731 'format': '17', # 3gp format available on android
1732 'extractor_args': {'youtube': {'player_client': ['android']}},
1733 },
1734 },
1735 {
1736 # Skip download of additional client configs (remix client config in this case)
1737 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1738 'only_matching': True,
1739 'params': {
1740 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1741 },
1742 }
1743 ]
1744
1745 @classmethod
1746 def suitable(cls, url):
1747 # Hack for lazy extractors until more generic solution is implemented
1748 # (see #28780)
1749 from .youtube import parse_qs
1750 qs = parse_qs(url)
1751 if qs.get('list', [None])[0]:
1752 return False
1753 return super(YoutubeIE, cls).suitable(url)
1754
1755 def __init__(self, *args, **kwargs):
1756 super(YoutubeIE, self).__init__(*args, **kwargs)
1757 self._code_cache = {}
1758 self._player_cache = {}
1759
1760 def _extract_player_url(self, ytcfg=None, webpage=None):
1761 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1762 if not player_url and webpage:
1763 player_url = self._search_regex(
1764 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1765 webpage, 'player URL', fatal=False)
1766 if not player_url:
1767 return None
1768 if player_url.startswith('//'):
1769 player_url = 'https:' + player_url
1770 elif not re.match(r'https?://', player_url):
1771 player_url = compat_urlparse.urljoin(
1772 'https://www.youtube.com', player_url)
1773 return player_url
1774
1775 def _signature_cache_id(self, example_sig):
1776 """ Return a string representation of a signature """
1777 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1778
1779 @classmethod
1780 def _extract_player_info(cls, player_url):
1781 for player_re in cls._PLAYER_INFO_RE:
1782 id_m = re.search(player_re, player_url)
1783 if id_m:
1784 break
1785 else:
1786 raise ExtractorError('Cannot identify player %r' % player_url)
1787 return id_m.group('id')
1788
1789 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1790 player_id = self._extract_player_info(player_url)
1791 if player_id not in self._code_cache:
1792 self._code_cache[player_id] = self._download_webpage(
1793 player_url, video_id, fatal=fatal,
1794 note='Downloading player ' + player_id,
1795 errnote='Download of %s failed' % player_url)
1796 return player_id in self._code_cache
1797
1798 def _extract_signature_function(self, video_id, player_url, example_sig):
1799 player_id = self._extract_player_info(player_url)
1800
1801 # Read from filesystem cache
1802 func_id = 'js_%s_%s' % (
1803 player_id, self._signature_cache_id(example_sig))
1804 assert os.path.basename(func_id) == func_id
1805
1806 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1807 if cache_spec is not None:
1808 return lambda s: ''.join(s[i] for i in cache_spec)
1809
1810 if self._load_player(video_id, player_url):
1811 code = self._code_cache[player_id]
1812 res = self._parse_sig_js(code)
1813
1814 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1815 cache_res = res(test_string)
1816 cache_spec = [ord(c) for c in cache_res]
1817
1818 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1819 return res
1820
1821 def _print_sig_code(self, func, example_sig):
1822 def gen_sig_code(idxs):
1823 def _genslice(start, end, step):
1824 starts = '' if start == 0 else str(start)
1825 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1826 steps = '' if step == 1 else (':%d' % step)
1827 return 's[%s%s%s]' % (starts, ends, steps)
1828
1829 step = None
1830 # Quelch pyflakes warnings - start will be set when step is set
1831 start = '(Never used)'
1832 for i, prev in zip(idxs[1:], idxs[:-1]):
1833 if step is not None:
1834 if i - prev == step:
1835 continue
1836 yield _genslice(start, prev, step)
1837 step = None
1838 continue
1839 if i - prev in [-1, 1]:
1840 step = i - prev
1841 start = prev
1842 continue
1843 else:
1844 yield 's[%d]' % prev
1845 if step is None:
1846 yield 's[%d]' % i
1847 else:
1848 yield _genslice(start, i, step)
1849
1850 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1851 cache_res = func(test_string)
1852 cache_spec = [ord(c) for c in cache_res]
1853 expr_code = ' + '.join(gen_sig_code(cache_spec))
1854 signature_id_tuple = '(%s)' % (
1855 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1856 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1857 ' return %s\n') % (signature_id_tuple, expr_code)
1858 self.to_screen('Extracted signature function:\n' + code)
1859
1860 def _parse_sig_js(self, jscode):
1861 funcname = self._search_regex(
1862 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1863 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1864 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1865 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1866 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1867 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1868 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1869 # Obsolete patterns
1870 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1871 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1872 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1873 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1874 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1875 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1876 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1877 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1878 jscode, 'Initial JS player signature function name', group='sig')
1879
1880 jsi = JSInterpreter(jscode)
1881 initial_function = jsi.extract_function(funcname)
1882 return lambda s: initial_function([s])
1883
1884 def _decrypt_signature(self, s, video_id, player_url):
1885 """Turn the encrypted s field into a working signature"""
1886
1887 if player_url is None:
1888 raise ExtractorError('Cannot decrypt signature without player_url')
1889
1890 try:
1891 player_id = (player_url, self._signature_cache_id(s))
1892 if player_id not in self._player_cache:
1893 func = self._extract_signature_function(
1894 video_id, player_url, s
1895 )
1896 self._player_cache[player_id] = func
1897 func = self._player_cache[player_id]
1898 if self.get_param('youtube_print_sig_code'):
1899 self._print_sig_code(func, s)
1900 return func(s)
1901 except Exception as e:
1902 tb = traceback.format_exc()
1903 raise ExtractorError(
1904 'Signature extraction failed: ' + tb, cause=e)
1905
1906 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1907 """
1908 Extract signatureTimestamp (sts)
1909 Required to tell API what sig/player version is in use.
1910 """
1911 sts = None
1912 if isinstance(ytcfg, dict):
1913 sts = int_or_none(ytcfg.get('STS'))
1914
1915 if not sts:
1916 # Attempt to extract from player
1917 if player_url is None:
1918 error_msg = 'Cannot extract signature timestamp without player_url.'
1919 if fatal:
1920 raise ExtractorError(error_msg)
1921 self.report_warning(error_msg)
1922 return
1923 if self._load_player(video_id, player_url, fatal=fatal):
1924 player_id = self._extract_player_info(player_url)
1925 code = self._code_cache[player_id]
1926 sts = int_or_none(self._search_regex(
1927 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1928 'JS player signature timestamp', group='sts', fatal=fatal))
1929 return sts
1930
1931 def _mark_watched(self, video_id, player_responses):
1932 playback_url = traverse_obj(
1933 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1934 expected_type=url_or_none, get_all=False)
1935 if not playback_url:
1936 self.report_warning('Unable to mark watched')
1937 return
1938 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1939 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1940
1941 # cpn generation algorithm is reverse engineered from base.js.
1942 # In fact it works even with dummy cpn.
1943 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1944 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1945
1946 qs.update({
1947 'ver': ['2'],
1948 'cpn': [cpn],
1949 })
1950 playback_url = compat_urlparse.urlunparse(
1951 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1952
1953 self._download_webpage(
1954 playback_url, video_id, 'Marking watched',
1955 'Unable to mark watched', fatal=False)
1956
1957 @staticmethod
1958 def _extract_urls(webpage):
1959 # Embedded YouTube player
1960 entries = [
1961 unescapeHTML(mobj.group('url'))
1962 for mobj in re.finditer(r'''(?x)
1963 (?:
1964 <iframe[^>]+?src=|
1965 data-video-url=|
1966 <embed[^>]+?src=|
1967 embedSWF\(?:\s*|
1968 <object[^>]+data=|
1969 new\s+SWFObject\(
1970 )
1971 (["\'])
1972 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1973 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1974 \1''', webpage)]
1975
1976 # lazyYT YouTube embed
1977 entries.extend(list(map(
1978 unescapeHTML,
1979 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1980
1981 # Wordpress "YouTube Video Importer" plugin
1982 matches = re.findall(r'''(?x)<div[^>]+
1983 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1984 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1985 entries.extend(m[-1] for m in matches)
1986
1987 return entries
1988
1989 @staticmethod
1990 def _extract_url(webpage):
1991 urls = YoutubeIE._extract_urls(webpage)
1992 return urls[0] if urls else None
1993
1994 @classmethod
1995 def extract_id(cls, url):
1996 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1997 if mobj is None:
1998 raise ExtractorError('Invalid URL: %s' % url)
1999 video_id = mobj.group(2)
2000 return video_id
2001
2002 def _extract_chapters_from_json(self, data, duration):
2003 chapter_list = traverse_obj(
2004 data, (
2005 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2006 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2007 ), expected_type=list)
2008
2009 return self._extract_chapters(
2010 chapter_list,
2011 chapter_time=lambda chapter: float_or_none(
2012 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2013 chapter_title=lambda chapter: traverse_obj(
2014 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2015 duration=duration)
2016
2017 def _extract_chapters_from_engagement_panel(self, data, duration):
2018 content_list = traverse_obj(
2019 data,
2020 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2021 expected_type=list, default=[])
2022 chapter_time = lambda chapter: parse_duration(self._get_text(chapter.get('timeDescription')))
2023 chapter_title = lambda chapter: self._get_text(chapter.get('title'))
2024
2025 return next((
2026 filter(None, (
2027 self._extract_chapters(
2028 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2029 chapter_time, chapter_title, duration)
2030 for contents in content_list
2031 ))), [])
2032
2033 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2034 chapters = []
2035 last_chapter = {'start_time': 0}
2036 for idx, chapter in enumerate(chapter_list or []):
2037 title = chapter_title(chapter)
2038 start_time = chapter_time(chapter)
2039 if start_time is None:
2040 continue
2041 last_chapter['end_time'] = start_time
2042 if start_time < last_chapter['start_time']:
2043 if idx == 1:
2044 chapters.pop()
2045 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2046 else:
2047 self.report_warning(f'Invalid start time for chapter "{title}"')
2048 continue
2049 last_chapter = {'start_time': start_time, 'title': title}
2050 chapters.append(last_chapter)
2051 last_chapter['end_time'] = duration
2052 return chapters
2053
2054 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2055 return self._parse_json(self._search_regex(
2056 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2057 regex), webpage, name, default='{}'), video_id, fatal=False)
2058
2059 @staticmethod
2060 def parse_time_text(time_text):
2061 """
2062 Parse the comment time text
2063 time_text is in the format 'X units ago (edited)'
2064 """
2065 time_text_split = time_text.split(' ')
2066 if len(time_text_split) >= 3:
2067 try:
2068 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2069 except ValueError:
2070 return None
2071
2072 def _extract_comment(self, comment_renderer, parent=None):
2073 comment_id = comment_renderer.get('commentId')
2074 if not comment_id:
2075 return
2076
2077 text = self._get_text(comment_renderer.get('contentText'))
2078
2079 # note: timestamp is an estimate calculated from the current time and time_text
2080 time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
2081 time_text_dt = self.parse_time_text(time_text)
2082 if isinstance(time_text_dt, datetime.datetime):
2083 timestamp = calendar.timegm(time_text_dt.timetuple())
2084 author = self._get_text(comment_renderer.get('authorText'))
2085 author_id = try_get(comment_renderer,
2086 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2087
2088 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2089 lambda x: x['likeCount']), compat_str)) or 0
2090 author_thumbnail = try_get(comment_renderer,
2091 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2092
2093 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2094 is_favorited = 'creatorHeart' in (try_get(
2095 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2096 return {
2097 'id': comment_id,
2098 'text': text,
2099 'timestamp': timestamp,
2100 'time_text': time_text,
2101 'like_count': votes,
2102 'is_favorited': is_favorited,
2103 'author': author,
2104 'author_id': author_id,
2105 'author_thumbnail': author_thumbnail,
2106 'author_is_uploader': author_is_uploader,
2107 'parent': parent or 'root'
2108 }
2109
2110 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2111 ytcfg, video_id, parent=None, comment_counts=None):
2112
2113 def extract_header(contents):
2114 _total_comments = 0
2115 _continuation = None
2116 for content in contents:
2117 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2118 expected_comment_count = parse_count(self._get_text(
2119 comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2120
2121 if expected_comment_count:
2122 comment_counts[1] = expected_comment_count
2123 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2124 _total_comments = comment_counts[1]
2125 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2126 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2127
2128 sort_menu_item = try_get(
2129 comments_header_renderer,
2130 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2131 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2132
2133 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2134 if not _continuation:
2135 continue
2136
2137 sort_text = sort_menu_item.get('title')
2138 if isinstance(sort_text, compat_str):
2139 sort_text = sort_text.lower()
2140 else:
2141 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2142 self.to_screen('Sorting comments by %s' % sort_text)
2143 break
2144 return _total_comments, _continuation
2145
2146 def extract_thread(contents):
2147 if not parent:
2148 comment_counts[2] = 0
2149 for content in contents:
2150 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2151 comment_renderer = try_get(
2152 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2153 content, (lambda x: x['commentRenderer'], dict))
2154
2155 if not comment_renderer:
2156 continue
2157 comment = self._extract_comment(comment_renderer, parent)
2158 if not comment:
2159 continue
2160 comment_counts[0] += 1
2161 yield comment
2162 # Attempt to get the replies
2163 comment_replies_renderer = try_get(
2164 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2165
2166 if comment_replies_renderer:
2167 comment_counts[2] += 1
2168 comment_entries_iter = self._comment_entries(
2169 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2170 video_id, parent=comment.get('id'), comment_counts=comment_counts)
2171
2172 for reply_comment in comment_entries_iter:
2173 yield reply_comment
2174
2175 # YouTube comments have a max depth of 2
2176 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2177 if max_depth == 1 and parent:
2178 return
2179 if not comment_counts:
2180 # comment so far, est. total comments, current comment thread #
2181 comment_counts = [0, 0, 0]
2182
2183 continuation = self._extract_continuation(root_continuation_data)
2184 if continuation and len(continuation['continuation']) < 27:
2185 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2186 continuation_token = self._generate_comment_continuation(video_id)
2187 continuation = self._build_api_continuation_query(continuation_token, None)
2188
2189 visitor_data = None
2190 is_first_continuation = parent is None
2191
2192 for page_num in itertools.count(0):
2193 if not continuation:
2194 break
2195 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2196 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2197 if page_num == 0:
2198 if is_first_continuation:
2199 note_prefix = 'Downloading comment section API JSON'
2200 else:
2201 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2202 comment_counts[2], comment_prog_str)
2203 else:
2204 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2205 ' ' if parent else '', ' replies' if parent else '',
2206 page_num, comment_prog_str)
2207
2208 response = self._extract_response(
2209 item_id=None, query=continuation,
2210 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2211 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2212 if not response:
2213 break
2214 visitor_data = try_get(
2215 response,
2216 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2217 compat_str) or visitor_data
2218
2219 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2220
2221 continuation = None
2222 if isinstance(continuation_contents, list):
2223 for continuation_section in continuation_contents:
2224 if not isinstance(continuation_section, dict):
2225 continue
2226 continuation_items = try_get(
2227 continuation_section,
2228 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2229 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2230 list) or []
2231 if is_first_continuation:
2232 total_comments, continuation = extract_header(continuation_items)
2233 if total_comments:
2234 yield total_comments
2235 is_first_continuation = False
2236 if continuation:
2237 break
2238 continue
2239 count = 0
2240 for count, entry in enumerate(extract_thread(continuation_items)):
2241 yield entry
2242 continuation = self._extract_continuation({'contents': continuation_items})
2243 if continuation:
2244 # Sometimes YouTube provides a continuation without any comments
2245 # In most cases we end up just downloading these with very little comments to come.
2246 if count == 0:
2247 if not parent:
2248 self.report_warning('No comments received - assuming end of comments')
2249 continuation = None
2250 break
2251
2252 # Deprecated response structure
2253 elif isinstance(continuation_contents, dict):
2254 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2255 for key, continuation_renderer in continuation_contents.items():
2256 if key not in known_continuation_renderers:
2257 continue
2258 if not isinstance(continuation_renderer, dict):
2259 continue
2260 if is_first_continuation:
2261 header_continuation_items = [continuation_renderer.get('header') or {}]
2262 total_comments, continuation = extract_header(header_continuation_items)
2263 if total_comments:
2264 yield total_comments
2265 is_first_continuation = False
2266 if continuation:
2267 break
2268
2269 # Sometimes YouTube provides a continuation without any comments
2270 # In most cases we end up just downloading these with very little comments to come.
2271 count = 0
2272 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2273 yield entry
2274 continuation = self._extract_continuation(continuation_renderer)
2275 if count == 0:
2276 if not parent:
2277 self.report_warning('No comments received - assuming end of comments')
2278 continuation = None
2279 break
2280
2281 @staticmethod
2282 def _generate_comment_continuation(video_id):
2283 """
2284 Generates initial comment section continuation token from given video id
2285 """
2286 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2287 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2288 new_continuation_intlist = list(itertools.chain.from_iterable(
2289 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2290 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2291
2292 def _extract_comments(self, ytcfg, video_id, contents, webpage):
2293 """Entry for comment extraction"""
2294 def _real_comment_extract(contents):
2295 if isinstance(contents, list):
2296 for entry in contents:
2297 for key, renderer in entry.items():
2298 if key not in known_entry_comment_renderers:
2299 continue
2300 yield from self._comment_entries(
2301 renderer, video_id=video_id, ytcfg=ytcfg,
2302 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2303 account_syncid=self._extract_account_syncid(ytcfg))
2304 break
2305 comments = []
2306 known_entry_comment_renderers = ('itemSectionRenderer',)
2307 estimated_total = 0
2308 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2309
2310 try:
2311 for comment in _real_comment_extract(contents):
2312 if len(comments) >= max_comments:
2313 break
2314 if isinstance(comment, int):
2315 estimated_total = comment
2316 continue
2317 comments.append(comment)
2318 except KeyboardInterrupt:
2319 self.to_screen('Interrupted by user')
2320 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2321 return {
2322 'comments': comments,
2323 'comment_count': len(comments),
2324 }
2325
2326 @staticmethod
2327 def _generate_player_context(sts=None):
2328 context = {
2329 'html5Preference': 'HTML5_PREF_WANTS',
2330 }
2331 if sts is not None:
2332 context['signatureTimestamp'] = sts
2333 return {
2334 'playbackContext': {
2335 'contentPlaybackContext': context
2336 },
2337 'contentCheckOk': True
2338 }
2339
2340 @staticmethod
2341 def _get_video_info_params(video_id, client='TVHTML5'):
2342 GVI_CLIENTS = {
2343 'ANDROID': {
2344 'c': 'ANDROID',
2345 'cver': '16.20',
2346 },
2347 'TVHTML5': {
2348 'c': 'TVHTML5',
2349 'cver': '6.20180913',
2350 },
2351 'IOS': {
2352 'c': 'IOS',
2353 'cver': '16.20'
2354 }
2355 }
2356 query = {
2357 'video_id': video_id,
2358 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2359 'html5': '1'
2360 }
2361 query.update(GVI_CLIENTS.get(client))
2362 return query
2363
2364 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2365
2366 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2367 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2368 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2369 headers = self.generate_api_headers(
2370 player_ytcfg, identity_token, syncid,
2371 default_client=self._YT_CLIENTS[client], session_index=session_index)
2372
2373 yt_query = {'videoId': video_id}
2374 yt_query.update(self._generate_player_context(sts))
2375 return self._extract_response(
2376 item_id=video_id, ep='player', query=yt_query,
2377 ytcfg=player_ytcfg, headers=headers, fatal=False,
2378 default_client=self._YT_CLIENTS[client],
2379 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2380 ) or None
2381
2382 def _extract_age_gated_player_response(self, client, video_id, ytcfg, identity_token, player_url, initial_pr):
2383 gvi_client = self._YT_CLIENTS.get(f'_{client}_agegate')
2384 if not gvi_client:
2385 return
2386
2387 pr = self._parse_json(traverse_obj(
2388 compat_parse_qs(self._download_webpage(
2389 self.http_scheme() + '//www.youtube.com/get_video_info', video_id,
2390 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2391 'unable to download video info webpage', fatal=False,
2392 query=self._get_video_info_params(video_id, client=gvi_client))),
2393 ('player_response', 0), expected_type=str) or '{}', video_id)
2394 if pr:
2395 return pr
2396
2397 self.report_warning('Falling back to embedded-only age-gate workaround')
2398 embed_webpage = None
2399 if client == 'web' and 'configs' not in self._configuration_arg('player_skip'):
2400 embed_webpage = self._download_webpage(
2401 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2402 video_id=video_id, note=f'Downloading age-gated {client} embed config')
2403
2404 ytcfg_age = self.extract_ytcfg(video_id, embed_webpage) or {}
2405 # If we extracted the embed webpage, it'll tell us if we can view the video
2406 embedded_pr = self._parse_json(
2407 traverse_obj(ytcfg_age, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
2408 video_id=video_id)
2409 embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
2410 if embedded_ps_reason in self._AGE_GATE_REASONS:
2411 return
2412 return self._extract_player_response(
2413 f'_{client}_embedded', video_id,
2414 ytcfg_age or ytcfg, ytcfg_age if client == 'web' else {},
2415 identity_token, player_url, initial_pr)
2416
2417 def _get_requested_clients(self, url, smuggled_data):
2418 requested_clients = [client for client in self._configuration_arg('player_client')
2419 if client[:0] != '_' and client in self._YT_CLIENTS]
2420 if not requested_clients:
2421 requested_clients = ['android', 'web']
2422
2423 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2424 requested_clients.extend(
2425 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
2426
2427 return orderedSet(requested_clients)
2428
2429 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2430 initial_pr = None
2431 if webpage:
2432 initial_pr = self._extract_yt_initial_variable(
2433 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2434 video_id, 'initial player response')
2435
2436 age_gated = False
2437 for client in clients:
2438 player_ytcfg = master_ytcfg if client == 'web' else {}
2439 if age_gated:
2440 pr = None
2441 elif client == 'web' and initial_pr:
2442 pr = initial_pr
2443 else:
2444 if client == 'web_music' and 'configs' not in self._configuration_arg('player_skip'):
2445 ytm_webpage = self._download_webpage(
2446 'https://music.youtube.com',
2447 video_id, fatal=False, note='Downloading remix client config')
2448 player_ytcfg = self.extract_ytcfg(video_id, ytm_webpage) or {}
2449 pr = self._extract_player_response(
2450 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2451 if pr:
2452 yield pr
2453 if age_gated or traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
2454 age_gated = True
2455 pr = self._extract_age_gated_player_response(
2456 client, video_id, player_ytcfg or master_ytcfg, identity_token, player_url, initial_pr)
2457 if pr:
2458 yield pr
2459 # Android player_response does not have microFormats which are needed for
2460 # extraction of some data. So we return the initial_pr with formats
2461 # stripped out even if not requested by the user
2462 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2463 if initial_pr and 'web' not in clients:
2464 initial_pr['streamingData'] = None
2465 yield initial_pr
2466
2467 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2468 itags, stream_ids = [], []
2469 itag_qualities = {}
2470 q = qualities([
2471 # "tiny" is the smallest video-only format. But some audio-only formats
2472 # was also labeled "tiny". It is not clear if such formats still exist
2473 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2474 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2475 ])
2476 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2477
2478 for fmt in streaming_formats:
2479 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2480 continue
2481
2482 itag = str_or_none(fmt.get('itag'))
2483 audio_track = fmt.get('audioTrack') or {}
2484 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2485 if stream_id in stream_ids:
2486 continue
2487
2488 quality = fmt.get('quality')
2489 if quality == 'tiny' or not quality:
2490 quality = fmt.get('audioQuality', '').lower() or quality
2491 if itag and quality:
2492 itag_qualities[itag] = quality
2493 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2494 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2495 # number of fragment that would subsequently requested with (`&sq=N`)
2496 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2497 continue
2498
2499 fmt_url = fmt.get('url')
2500 if not fmt_url:
2501 sc = compat_parse_qs(fmt.get('signatureCipher'))
2502 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2503 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2504 if not (sc and fmt_url and encrypted_sig):
2505 continue
2506 if not player_url:
2507 continue
2508 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2509 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2510 fmt_url += '&' + sp + '=' + signature
2511
2512 if itag:
2513 itags.append(itag)
2514 stream_ids.append(stream_id)
2515
2516 tbr = float_or_none(
2517 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2518 dct = {
2519 'asr': int_or_none(fmt.get('audioSampleRate')),
2520 'filesize': int_or_none(fmt.get('contentLength')),
2521 'format_id': itag,
2522 'format_note': ', '.join(filter(None, (
2523 audio_track.get('displayName'), fmt.get('qualityLabel') or quality))),
2524 'fps': int_or_none(fmt.get('fps')),
2525 'height': int_or_none(fmt.get('height')),
2526 'quality': q(quality),
2527 'tbr': tbr,
2528 'url': fmt_url,
2529 'width': fmt.get('width'),
2530 'language': audio_track.get('id', '').split('.')[0],
2531 }
2532 mime_mobj = re.match(
2533 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2534 if mime_mobj:
2535 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2536 dct.update(parse_codecs(mime_mobj.group(2)))
2537 # The 3gp format in android client has a quality of "small",
2538 # but is actually worse than all other formats
2539 if dct['ext'] == '3gp':
2540 dct['quality'] = q('tiny')
2541 dct['preference'] = -10
2542 no_audio = dct.get('acodec') == 'none'
2543 no_video = dct.get('vcodec') == 'none'
2544 if no_audio:
2545 dct['vbr'] = tbr
2546 if no_video:
2547 dct['abr'] = tbr
2548 if no_audio or no_video:
2549 dct['downloader_options'] = {
2550 # Youtube throttles chunks >~10M
2551 'http_chunk_size': 10485760,
2552 }
2553 if dct.get('ext'):
2554 dct['container'] = dct['ext'] + '_dash'
2555 yield dct
2556
2557 skip_manifests = self._configuration_arg('skip')
2558 get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2559 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2560
2561 for sd in streaming_data:
2562 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2563 if hls_manifest_url:
2564 for f in self._extract_m3u8_formats(
2565 hls_manifest_url, video_id, 'mp4', fatal=False):
2566 itag = self._search_regex(
2567 r'/itag/(\d+)', f['url'], 'itag', default=None)
2568 if itag in itags:
2569 continue
2570 if itag:
2571 f['format_id'] = itag
2572 itags.append(itag)
2573 yield f
2574
2575 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2576 if dash_manifest_url:
2577 for f in self._extract_mpd_formats(
2578 dash_manifest_url, video_id, fatal=False):
2579 itag = f['format_id']
2580 if itag in itags:
2581 continue
2582 if itag:
2583 itags.append(itag)
2584 if itag in itag_qualities:
2585 f['quality'] = q(itag_qualities[itag])
2586 filesize = int_or_none(self._search_regex(
2587 r'/clen/(\d+)', f.get('fragment_base_url')
2588 or f['url'], 'file size', default=None))
2589 if filesize:
2590 f['filesize'] = filesize
2591 yield f
2592
2593 def _real_extract(self, url):
2594 url, smuggled_data = unsmuggle_url(url, {})
2595 video_id = self._match_id(url)
2596
2597 base_url = self.http_scheme() + '//www.youtube.com/'
2598 webpage_url = base_url + 'watch?v=' + video_id
2599 webpage = self._download_webpage(
2600 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2601
2602 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2603 player_url = self._extract_player_url(master_ytcfg, webpage)
2604 identity_token = self._extract_identity_token(webpage, video_id)
2605
2606 player_responses = list(self._extract_player_responses(
2607 self._get_requested_clients(url, smuggled_data),
2608 video_id, webpage, master_ytcfg, player_url, identity_token))
2609
2610 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2611
2612 playability_statuses = traverse_obj(
2613 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2614
2615 trailer_video_id = get_first(
2616 playability_statuses,
2617 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2618 expected_type=str)
2619 if trailer_video_id:
2620 return self.url_result(
2621 trailer_video_id, self.ie_key(), trailer_video_id)
2622
2623 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2624 if webpage else (lambda x: None))
2625
2626 video_details = traverse_obj(
2627 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2628 microformats = traverse_obj(
2629 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2630 expected_type=dict, default=[])
2631 video_title = (
2632 get_first(video_details, 'title')
2633 or self._get_text(microformats, (..., 'title'))
2634 or search_meta(['og:title', 'twitter:title', 'title']))
2635 video_description = get_first(video_details, 'shortDescription')
2636
2637 if not smuggled_data.get('force_singlefeed', False):
2638 if not self.get_param('noplaylist'):
2639 multifeed_metadata_list = get_first(
2640 player_responses,
2641 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2642 expected_type=str)
2643 if multifeed_metadata_list:
2644 entries = []
2645 feed_ids = []
2646 for feed in multifeed_metadata_list.split(','):
2647 # Unquote should take place before split on comma (,) since textual
2648 # fields may contain comma as well (see
2649 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2650 feed_data = compat_parse_qs(
2651 compat_urllib_parse_unquote_plus(feed))
2652
2653 def feed_entry(name):
2654 return try_get(
2655 feed_data, lambda x: x[name][0], compat_str)
2656
2657 feed_id = feed_entry('id')
2658 if not feed_id:
2659 continue
2660 feed_title = feed_entry('title')
2661 title = video_title
2662 if feed_title:
2663 title += ' (%s)' % feed_title
2664 entries.append({
2665 '_type': 'url_transparent',
2666 'ie_key': 'Youtube',
2667 'url': smuggle_url(
2668 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2669 {'force_singlefeed': True}),
2670 'title': title,
2671 })
2672 feed_ids.append(feed_id)
2673 self.to_screen(
2674 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2675 % (', '.join(feed_ids), video_id))
2676 return self.playlist_result(
2677 entries, video_id, video_title, video_description)
2678 else:
2679 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2680
2681 category = get_first(microformats, 'category') or search_meta('genre')
2682 channel_id = get_first(video_details, 'channelId') \
2683 or get_first(microformats, 'externalChannelId') \
2684 or search_meta('channelId')
2685 duration = int_or_none(
2686 get_first(video_details, 'lengthSeconds')
2687 or get_first(microformats, 'lengthSeconds')) \
2688 or parse_duration(search_meta('duration'))
2689 is_live = get_first(video_details, 'isLive')
2690 is_upcoming = get_first(video_details, 'isUpcoming')
2691 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2692
2693 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2694 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2695
2696 if not formats:
2697 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2698 self.raise_no_formats(
2699 'This video is DRM protected.', expected=True)
2700 pemr = get_first(
2701 playability_statuses,
2702 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2703 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2704 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2705 if subreason:
2706 if subreason == 'The uploader has not made this video available in your country.':
2707 countries = get_first(microformats, 'availableCountries')
2708 if not countries:
2709 regions_allowed = search_meta('regionsAllowed')
2710 countries = regions_allowed.split(',') if regions_allowed else None
2711 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2712 reason += f'. {subreason}'
2713 if reason:
2714 self.raise_no_formats(reason, expected=True)
2715
2716 for f in formats:
2717 # TODO: detect if throttled
2718 if '&n=' in f['url']: # possibly throttled
2719 f['source_preference'] = -10
2720 # note = f.get('format_note')
2721 # f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
2722
2723 self._sort_formats(formats)
2724
2725 keywords = get_first(video_details, 'keywords', expected_type=list) or []
2726 if not keywords and webpage:
2727 keywords = [
2728 unescapeHTML(m.group('content'))
2729 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2730 for keyword in keywords:
2731 if keyword.startswith('yt:stretch='):
2732 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2733 if mobj:
2734 # NB: float is intentional for forcing float division
2735 w, h = (float(v) for v in mobj.groups())
2736 if w > 0 and h > 0:
2737 ratio = w / h
2738 for f in formats:
2739 if f.get('vcodec') != 'none':
2740 f['stretched_ratio'] = ratio
2741 break
2742
2743 thumbnails = []
2744 thumbnail_dicts = traverse_obj(
2745 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2746 expected_type=dict, default=[])
2747 for thumbnail in thumbnail_dicts:
2748 thumbnail_url = thumbnail.get('url')
2749 if not thumbnail_url:
2750 continue
2751 # Sometimes youtube gives a wrong thumbnail URL. See:
2752 # https://github.com/yt-dlp/yt-dlp/issues/233
2753 # https://github.com/ytdl-org/youtube-dl/issues/28023
2754 if 'maxresdefault' in thumbnail_url:
2755 thumbnail_url = thumbnail_url.split('?')[0]
2756 thumbnails.append({
2757 'url': thumbnail_url,
2758 'height': int_or_none(thumbnail.get('height')),
2759 'width': int_or_none(thumbnail.get('width')),
2760 })
2761 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2762 if thumbnail_url:
2763 thumbnails.append({
2764 'url': thumbnail_url,
2765 })
2766 # The best resolution thumbnails sometimes does not appear in the webpage
2767 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2768 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2769 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2770 guaranteed_thumbnail_names = [
2771 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2772 'mqdefault', 'mq1', 'mq2', 'mq3',
2773 'default', '1', '2', '3'
2774 ]
2775 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2776 n_thumbnail_names = len(thumbnail_names)
2777
2778 thumbnails.extend({
2779 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2780 video_id=video_id, name=name, ext=ext,
2781 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2782 '_test_url': name in hq_thumbnail_names,
2783 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2784 for thumb in thumbnails:
2785 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2786 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2787 self._remove_duplicate_formats(thumbnails)
2788
2789 info = {
2790 'id': video_id,
2791 'title': self._live_title(video_title) if is_live else video_title,
2792 'formats': formats,
2793 'thumbnails': thumbnails,
2794 'description': video_description,
2795 'upload_date': unified_strdate(
2796 get_first(microformats, 'uploadDate')
2797 or search_meta('uploadDate')),
2798 'uploader': get_first(video_details, 'author'),
2799 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2800 'uploader_url': owner_profile_url,
2801 'channel_id': channel_id,
2802 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2803 'duration': duration,
2804 'view_count': int_or_none(
2805 get_first((video_details, microformats), (..., 'viewCount'))
2806 or search_meta('interactionCount')),
2807 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2808 'age_limit': 18 if (
2809 get_first(microformats, 'isFamilySafe') is False
2810 or search_meta('isFamilyFriendly') == 'false'
2811 or search_meta('og:restrictions:age') == '18+') else 0,
2812 'webpage_url': webpage_url,
2813 'categories': [category] if category else None,
2814 'tags': keywords,
2815 'is_live': is_live,
2816 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2817 'was_live': get_first(video_details, 'isLiveContent'),
2818 }
2819
2820 pctr = get_first(player_responses, ('captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2821 subtitles = {}
2822 if pctr:
2823 def process_language(container, base_url, lang_code, sub_name, query):
2824 lang_subs = container.setdefault(lang_code, [])
2825 for fmt in self._SUBTITLE_FORMATS:
2826 query.update({
2827 'fmt': fmt,
2828 })
2829 lang_subs.append({
2830 'ext': fmt,
2831 'url': update_url_query(base_url, query),
2832 'name': sub_name,
2833 })
2834
2835 for caption_track in (pctr.get('captionTracks') or []):
2836 base_url = caption_track.get('baseUrl')
2837 if not base_url:
2838 continue
2839 if caption_track.get('kind') != 'asr':
2840 lang_code = (
2841 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2842 or caption_track.get('languageCode'))
2843 if not lang_code:
2844 continue
2845 process_language(
2846 subtitles, base_url, lang_code,
2847 try_get(caption_track, lambda x: x['name']['simpleText']),
2848 {})
2849 continue
2850 automatic_captions = {}
2851 for translation_language in (pctr.get('translationLanguages') or []):
2852 translation_language_code = translation_language.get('languageCode')
2853 if not translation_language_code:
2854 continue
2855 process_language(
2856 automatic_captions, base_url, translation_language_code,
2857 self._get_text(translation_language.get('languageName'), max_runs=1),
2858 {'tlang': translation_language_code})
2859 info['automatic_captions'] = automatic_captions
2860 info['subtitles'] = subtitles
2861
2862 parsed_url = compat_urllib_parse_urlparse(url)
2863 for component in [parsed_url.fragment, parsed_url.query]:
2864 query = compat_parse_qs(component)
2865 for k, v in query.items():
2866 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2867 d_k += '_time'
2868 if d_k not in info and k in s_ks:
2869 info[d_k] = parse_duration(query[k][0])
2870
2871 # Youtube Music Auto-generated description
2872 if video_description:
2873 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2874 if mobj:
2875 release_year = mobj.group('release_year')
2876 release_date = mobj.group('release_date')
2877 if release_date:
2878 release_date = release_date.replace('-', '')
2879 if not release_year:
2880 release_year = release_date[:4]
2881 info.update({
2882 'album': mobj.group('album'.strip()),
2883 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2884 'track': mobj.group('track').strip(),
2885 'release_date': release_date,
2886 'release_year': int_or_none(release_year),
2887 })
2888
2889 initial_data = None
2890 if webpage:
2891 initial_data = self._extract_yt_initial_variable(
2892 webpage, self._YT_INITIAL_DATA_RE, video_id,
2893 'yt initial data')
2894 if not initial_data:
2895 headers = self.generate_api_headers(
2896 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2897 session_index=self._extract_session_index(master_ytcfg))
2898
2899 initial_data = self._extract_response(
2900 item_id=video_id, ep='next', fatal=False,
2901 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
2902 note='Downloading initial data API JSON')
2903
2904 try:
2905 # This will error if there is no livechat
2906 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2907 info['subtitles']['live_chat'] = [{
2908 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2909 'video_id': video_id,
2910 'ext': 'json',
2911 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2912 }]
2913 except (KeyError, IndexError, TypeError):
2914 pass
2915
2916 if initial_data:
2917 info['chapters'] = (
2918 self._extract_chapters_from_json(initial_data, duration)
2919 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2920 or None)
2921
2922 contents = try_get(
2923 initial_data,
2924 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2925 list) or []
2926 for content in contents:
2927 vpir = content.get('videoPrimaryInfoRenderer')
2928 if vpir:
2929 stl = vpir.get('superTitleLink')
2930 if stl:
2931 stl = self._get_text(stl)
2932 if try_get(
2933 vpir,
2934 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2935 info['location'] = stl
2936 else:
2937 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2938 if mobj:
2939 info.update({
2940 'series': mobj.group(1),
2941 'season_number': int(mobj.group(2)),
2942 'episode_number': int(mobj.group(3)),
2943 })
2944 for tlb in (try_get(
2945 vpir,
2946 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2947 list) or []):
2948 tbr = tlb.get('toggleButtonRenderer') or {}
2949 for getter, regex in [(
2950 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2951 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2952 lambda x: x['accessibility'],
2953 lambda x: x['accessibilityData']['accessibilityData'],
2954 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2955 label = (try_get(tbr, getter, dict) or {}).get('label')
2956 if label:
2957 mobj = re.match(regex, label)
2958 if mobj:
2959 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2960 break
2961 sbr_tooltip = try_get(
2962 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2963 if sbr_tooltip:
2964 like_count, dislike_count = sbr_tooltip.split(' / ')
2965 info.update({
2966 'like_count': str_to_int(like_count),
2967 'dislike_count': str_to_int(dislike_count),
2968 })
2969 vsir = content.get('videoSecondaryInfoRenderer')
2970 if vsir:
2971 info['channel'] = self._get_text(try_get(
2972 vsir,
2973 lambda x: x['owner']['videoOwnerRenderer']['title'],
2974 dict))
2975 rows = try_get(
2976 vsir,
2977 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2978 list) or []
2979 multiple_songs = False
2980 for row in rows:
2981 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2982 multiple_songs = True
2983 break
2984 for row in rows:
2985 mrr = row.get('metadataRowRenderer') or {}
2986 mrr_title = mrr.get('title')
2987 if not mrr_title:
2988 continue
2989 mrr_title = self._get_text(mrr['title'])
2990 mrr_contents_text = self._get_text(mrr['contents'][0])
2991 if mrr_title == 'License':
2992 info['license'] = mrr_contents_text
2993 elif not multiple_songs:
2994 if mrr_title == 'Album':
2995 info['album'] = mrr_contents_text
2996 elif mrr_title == 'Artist':
2997 info['artist'] = mrr_contents_text
2998 elif mrr_title == 'Song':
2999 info['track'] = mrr_contents_text
3000
3001 fallbacks = {
3002 'channel': 'uploader',
3003 'channel_id': 'uploader_id',
3004 'channel_url': 'uploader_url',
3005 }
3006 for to, frm in fallbacks.items():
3007 if not info.get(to):
3008 info[to] = info.get(frm)
3009
3010 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3011 v = info.get(s_k)
3012 if v:
3013 info[d_k] = v
3014
3015 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3016 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3017 is_membersonly = None
3018 is_premium = None
3019 if initial_data and is_private is not None:
3020 is_membersonly = False
3021 is_premium = False
3022 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3023 badge_labels = set()
3024 for content in contents:
3025 if not isinstance(content, dict):
3026 continue
3027 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3028 for badge_label in badge_labels:
3029 if badge_label.lower() == 'members only':
3030 is_membersonly = True
3031 elif badge_label.lower() == 'premium':
3032 is_premium = True
3033 elif badge_label.lower() == 'unlisted':
3034 is_unlisted = True
3035
3036 info['availability'] = self._availability(
3037 is_private=is_private,
3038 needs_premium=is_premium,
3039 needs_subscription=is_membersonly,
3040 needs_auth=info['age_limit'] >= 18,
3041 is_unlisted=None if is_private is None else is_unlisted)
3042
3043 # get xsrf for annotations or comments
3044 get_annotations = self.get_param('writeannotations', False)
3045 get_comments = self.get_param('getcomments', False)
3046 if get_annotations or get_comments:
3047 xsrf_token = None
3048 if master_ytcfg:
3049 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
3050 if not xsrf_token:
3051 xsrf_token = self._search_regex(
3052 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
3053 webpage, 'xsrf token', group='xsrf_token', fatal=False)
3054
3055 # annotations
3056 if get_annotations:
3057 invideo_url = get_first(
3058 player_responses,
3059 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3060 expected_type=str)
3061 if xsrf_token and invideo_url:
3062 xsrf_field_name = None
3063 if master_ytcfg:
3064 xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3065 if not xsrf_field_name:
3066 xsrf_field_name = self._search_regex(
3067 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3068 webpage, 'xsrf field name',
3069 group='xsrf_field_name', default='session_token')
3070 info['annotations'] = self._download_webpage(
3071 self._proto_relative_url(invideo_url),
3072 video_id, note='Downloading annotations',
3073 errnote='Unable to download video annotations', fatal=False,
3074 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3075
3076 if get_comments:
3077 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3078
3079 self.mark_watched(video_id, player_responses)
3080
3081 return info
3082
3083
3084 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3085 IE_DESC = 'YouTube.com tab'
3086 _VALID_URL = r'''(?x)
3087 https?://
3088 (?:\w+\.)?
3089 (?:
3090 youtube(?:kids)?\.com|
3091 invidio\.us
3092 )/
3093 (?:
3094 (?P<channel_type>channel|c|user|browse)/|
3095 (?P<not_channel>
3096 feed/|hashtag/|
3097 (?:playlist|watch)\?.*?\blist=
3098 )|
3099 (?!(?:%s)\b) # Direct URLs
3100 )
3101 (?P<id>[^/?\#&]+)
3102 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3103 IE_NAME = 'youtube:tab'
3104
3105 _TESTS = [{
3106 'note': 'playlists, multipage',
3107 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3108 'playlist_mincount': 94,
3109 'info_dict': {
3110 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3111 'title': 'Игорь Клейнер - Playlists',
3112 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3113 'uploader': 'Игорь Клейнер',
3114 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3115 },
3116 }, {
3117 'note': 'playlists, multipage, different order',
3118 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3119 'playlist_mincount': 94,
3120 'info_dict': {
3121 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3122 'title': 'Игорь Клейнер - Playlists',
3123 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3124 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3125 'uploader': 'Игорь Клейнер',
3126 },
3127 }, {
3128 'note': 'playlists, series',
3129 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3130 'playlist_mincount': 5,
3131 'info_dict': {
3132 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3133 'title': '3Blue1Brown - Playlists',
3134 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3135 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3136 'uploader': '3Blue1Brown',
3137 },
3138 }, {
3139 'note': 'playlists, singlepage',
3140 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3141 'playlist_mincount': 4,
3142 'info_dict': {
3143 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3144 'title': 'ThirstForScience - Playlists',
3145 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3146 'uploader': 'ThirstForScience',
3147 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3148 }
3149 }, {
3150 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3151 'only_matching': True,
3152 }, {
3153 'note': 'basic, single video playlist',
3154 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3155 'info_dict': {
3156 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3157 'uploader': 'Sergey M.',
3158 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3159 'title': 'youtube-dl public playlist',
3160 },
3161 'playlist_count': 1,
3162 }, {
3163 'note': 'empty playlist',
3164 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3165 'info_dict': {
3166 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3167 'uploader': 'Sergey M.',
3168 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3169 'title': 'youtube-dl empty playlist',
3170 },
3171 'playlist_count': 0,
3172 }, {
3173 'note': 'Home tab',
3174 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3175 'info_dict': {
3176 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3177 'title': 'lex will - Home',
3178 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3179 'uploader': 'lex will',
3180 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3181 },
3182 'playlist_mincount': 2,
3183 }, {
3184 'note': 'Videos tab',
3185 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3186 'info_dict': {
3187 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3188 'title': 'lex will - Videos',
3189 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3190 'uploader': 'lex will',
3191 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3192 },
3193 'playlist_mincount': 975,
3194 }, {
3195 'note': 'Videos tab, sorted by popular',
3196 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3197 'info_dict': {
3198 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3199 'title': 'lex will - Videos',
3200 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3201 'uploader': 'lex will',
3202 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3203 },
3204 'playlist_mincount': 199,
3205 }, {
3206 'note': 'Playlists tab',
3207 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3208 'info_dict': {
3209 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3210 'title': 'lex will - Playlists',
3211 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3212 'uploader': 'lex will',
3213 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3214 },
3215 'playlist_mincount': 17,
3216 }, {
3217 'note': 'Community tab',
3218 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3219 'info_dict': {
3220 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3221 'title': 'lex will - Community',
3222 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3223 'uploader': 'lex will',
3224 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3225 },
3226 'playlist_mincount': 18,
3227 }, {
3228 'note': 'Channels tab',
3229 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3230 'info_dict': {
3231 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3232 'title': 'lex will - Channels',
3233 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3234 'uploader': 'lex will',
3235 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3236 },
3237 'playlist_mincount': 12,
3238 }, {
3239 'note': 'Search tab',
3240 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3241 'playlist_mincount': 40,
3242 'info_dict': {
3243 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3244 'title': '3Blue1Brown - Search - linear algebra',
3245 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3246 'uploader': '3Blue1Brown',
3247 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3248 },
3249 }, {
3250 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3251 'only_matching': True,
3252 }, {
3253 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3254 'only_matching': True,
3255 }, {
3256 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3257 'only_matching': True,
3258 }, {
3259 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3260 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3261 'info_dict': {
3262 'title': '29C3: Not my department',
3263 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3264 'uploader': 'Christiaan008',
3265 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3266 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3267 },
3268 'playlist_count': 96,
3269 }, {
3270 'note': 'Large playlist',
3271 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3272 'info_dict': {
3273 'title': 'Uploads from Cauchemar',
3274 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3275 'uploader': 'Cauchemar',
3276 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3277 },
3278 'playlist_mincount': 1123,
3279 }, {
3280 'note': 'even larger playlist, 8832 videos',
3281 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3282 'only_matching': True,
3283 }, {
3284 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3285 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3286 'info_dict': {
3287 'title': 'Uploads from Interstellar Movie',
3288 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3289 'uploader': 'Interstellar Movie',
3290 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3291 },
3292 'playlist_mincount': 21,
3293 }, {
3294 'note': 'Playlist with "show unavailable videos" button',
3295 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3296 'info_dict': {
3297 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3298 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3299 'uploader': 'Phim Siêu Nhân Nhật Bản',
3300 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3301 },
3302 'playlist_mincount': 200,
3303 }, {
3304 'note': 'Playlist with unavailable videos in page 7',
3305 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3306 'info_dict': {
3307 'title': 'Uploads from BlankTV',
3308 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3309 'uploader': 'BlankTV',
3310 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3311 },
3312 'playlist_mincount': 1000,
3313 }, {
3314 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3315 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3316 'info_dict': {
3317 'title': 'Data Analysis with Dr Mike Pound',
3318 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3319 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3320 'uploader': 'Computerphile',
3321 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3322 },
3323 'playlist_mincount': 11,
3324 }, {
3325 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3326 'only_matching': True,
3327 }, {
3328 'note': 'Playlist URL that does not actually serve a playlist',
3329 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3330 'info_dict': {
3331 'id': 'FqZTN594JQw',
3332 'ext': 'webm',
3333 'title': "Smiley's People 01 detective, Adventure Series, Action",
3334 'uploader': 'STREEM',
3335 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3336 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3337 'upload_date': '20150526',
3338 'license': 'Standard YouTube License',
3339 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3340 'categories': ['People & Blogs'],
3341 'tags': list,
3342 'view_count': int,
3343 'like_count': int,
3344 'dislike_count': int,
3345 },
3346 'params': {
3347 'skip_download': True,
3348 },
3349 'skip': 'This video is not available.',
3350 'add_ie': [YoutubeIE.ie_key()],
3351 }, {
3352 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3353 'only_matching': True,
3354 }, {
3355 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3356 'only_matching': True,
3357 }, {
3358 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3359 'info_dict': {
3360 'id': 'FMtPN8yp5LU', # This will keep changing
3361 'ext': 'mp4',
3362 'title': compat_str,
3363 'uploader': 'Sky News',
3364 'uploader_id': 'skynews',
3365 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3366 'upload_date': r're:\d{8}',
3367 'description': compat_str,
3368 'categories': ['News & Politics'],
3369 'tags': list,
3370 'like_count': int,
3371 'dislike_count': int,
3372 },
3373 'params': {
3374 'skip_download': True,
3375 },
3376 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3377 }, {
3378 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3379 'info_dict': {
3380 'id': 'a48o2S1cPoo',
3381 'ext': 'mp4',
3382 'title': 'The Young Turks - Live Main Show',
3383 'uploader': 'The Young Turks',
3384 'uploader_id': 'TheYoungTurks',
3385 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3386 'upload_date': '20150715',
3387 'license': 'Standard YouTube License',
3388 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3389 'categories': ['News & Politics'],
3390 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3391 'like_count': int,
3392 'dislike_count': int,
3393 },
3394 'params': {
3395 'skip_download': True,
3396 },
3397 'only_matching': True,
3398 }, {
3399 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3400 'only_matching': True,
3401 }, {
3402 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3403 'only_matching': True,
3404 }, {
3405 'note': 'A channel that is not live. Should raise error',
3406 'url': 'https://www.youtube.com/user/numberphile/live',
3407 'only_matching': True,
3408 }, {
3409 'url': 'https://www.youtube.com/feed/trending',
3410 'only_matching': True,
3411 }, {
3412 'url': 'https://www.youtube.com/feed/library',
3413 'only_matching': True,
3414 }, {
3415 'url': 'https://www.youtube.com/feed/history',
3416 'only_matching': True,
3417 }, {
3418 'url': 'https://www.youtube.com/feed/subscriptions',
3419 'only_matching': True,
3420 }, {
3421 'url': 'https://www.youtube.com/feed/watch_later',
3422 'only_matching': True,
3423 }, {
3424 'note': 'Recommended - redirects to home page',
3425 'url': 'https://www.youtube.com/feed/recommended',
3426 'only_matching': True,
3427 }, {
3428 'note': 'inline playlist with not always working continuations',
3429 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3430 'only_matching': True,
3431 }, {
3432 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3433 'only_matching': True,
3434 }, {
3435 'url': 'https://www.youtube.com/course',
3436 'only_matching': True,
3437 }, {
3438 'url': 'https://www.youtube.com/zsecurity',
3439 'only_matching': True,
3440 }, {
3441 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3442 'only_matching': True,
3443 }, {
3444 'url': 'https://www.youtube.com/TheYoungTurks/live',
3445 'only_matching': True,
3446 }, {
3447 'url': 'https://www.youtube.com/hashtag/cctv9',
3448 'info_dict': {
3449 'id': 'cctv9',
3450 'title': '#cctv9',
3451 },
3452 'playlist_mincount': 350,
3453 }, {
3454 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3455 'only_matching': True,
3456 }, {
3457 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3458 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3459 'only_matching': True
3460 }, {
3461 'note': '/browse/ should redirect to /channel/',
3462 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3463 'only_matching': True
3464 }, {
3465 'note': 'VLPL, should redirect to playlist?list=PL...',
3466 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3467 'info_dict': {
3468 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3469 'uploader': 'NoCopyrightSounds',
3470 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3471 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3472 'title': 'NCS Releases',
3473 },
3474 'playlist_mincount': 166,
3475 }, {
3476 'note': 'Topic, should redirect to playlist?list=UU...',
3477 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3478 'info_dict': {
3479 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3480 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3481 'title': 'Uploads from Royalty Free Music - Topic',
3482 'uploader': 'Royalty Free Music - Topic',
3483 },
3484 'expected_warnings': [
3485 'A channel/user page was given',
3486 'The URL does not have a videos tab',
3487 ],
3488 'playlist_mincount': 101,
3489 }, {
3490 'note': 'Topic without a UU playlist',
3491 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3492 'info_dict': {
3493 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3494 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3495 },
3496 'expected_warnings': [
3497 'A channel/user page was given',
3498 'The URL does not have a videos tab',
3499 'Falling back to channel URL',
3500 ],
3501 'playlist_mincount': 9,
3502 }, {
3503 'note': 'Youtube music Album',
3504 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3505 'info_dict': {
3506 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3507 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3508 },
3509 'playlist_count': 50,
3510 }, {
3511 'note': 'unlisted single video playlist',
3512 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3513 'info_dict': {
3514 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3515 'uploader': 'colethedj',
3516 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3517 'title': 'yt-dlp unlisted playlist test',
3518 'availability': 'unlisted'
3519 },
3520 'playlist_count': 1,
3521 }]
3522
3523 @classmethod
3524 def suitable(cls, url):
3525 return False if YoutubeIE.suitable(url) else super(
3526 YoutubeTabIE, cls).suitable(url)
3527
3528 def _extract_channel_id(self, webpage):
3529 channel_id = self._html_search_meta(
3530 'channelId', webpage, 'channel id', default=None)
3531 if channel_id:
3532 return channel_id
3533 channel_url = self._html_search_meta(
3534 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3535 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3536 'twitter:app:url:googleplay'), webpage, 'channel url')
3537 return self._search_regex(
3538 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3539 channel_url, 'channel id')
3540
3541 @staticmethod
3542 def _extract_basic_item_renderer(item):
3543 # Modified from _extract_grid_item_renderer
3544 known_basic_renderers = (
3545 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3546 )
3547 for key, renderer in item.items():
3548 if not isinstance(renderer, dict):
3549 continue
3550 elif key in known_basic_renderers:
3551 return renderer
3552 elif key.startswith('grid') and key.endswith('Renderer'):
3553 return renderer
3554
3555 def _grid_entries(self, grid_renderer):
3556 for item in grid_renderer['items']:
3557 if not isinstance(item, dict):
3558 continue
3559 renderer = self._extract_basic_item_renderer(item)
3560 if not isinstance(renderer, dict):
3561 continue
3562 title = self._get_text(renderer.get('title'))
3563
3564 # playlist
3565 playlist_id = renderer.get('playlistId')
3566 if playlist_id:
3567 yield self.url_result(
3568 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3569 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3570 video_title=title)
3571 continue
3572 # video
3573 video_id = renderer.get('videoId')
3574 if video_id:
3575 yield self._extract_video(renderer)
3576 continue
3577 # channel
3578 channel_id = renderer.get('channelId')
3579 if channel_id:
3580 yield self.url_result(
3581 'https://www.youtube.com/channel/%s' % channel_id,
3582 ie=YoutubeTabIE.ie_key(), video_title=title)
3583 continue
3584 # generic endpoint URL support
3585 ep_url = urljoin('https://www.youtube.com/', try_get(
3586 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3587 compat_str))
3588 if ep_url:
3589 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3590 if ie.suitable(ep_url):
3591 yield self.url_result(
3592 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3593 break
3594
3595 def _shelf_entries_from_content(self, shelf_renderer):
3596 content = shelf_renderer.get('content')
3597 if not isinstance(content, dict):
3598 return
3599 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3600 if renderer:
3601 # TODO: add support for nested playlists so each shelf is processed
3602 # as separate playlist
3603 # TODO: this includes only first N items
3604 for entry in self._grid_entries(renderer):
3605 yield entry
3606 renderer = content.get('horizontalListRenderer')
3607 if renderer:
3608 # TODO
3609 pass
3610
3611 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3612 ep = try_get(
3613 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3614 compat_str)
3615 shelf_url = urljoin('https://www.youtube.com', ep)
3616 if shelf_url:
3617 # Skipping links to another channels, note that checking for
3618 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3619 # will not work
3620 if skip_channels and '/channels?' in shelf_url:
3621 return
3622 title = self._get_text(shelf_renderer, lambda x: x['title'])
3623 yield self.url_result(shelf_url, video_title=title)
3624 # Shelf may not contain shelf URL, fallback to extraction from content
3625 for entry in self._shelf_entries_from_content(shelf_renderer):
3626 yield entry
3627
3628 def _playlist_entries(self, video_list_renderer):
3629 for content in video_list_renderer['contents']:
3630 if not isinstance(content, dict):
3631 continue
3632 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3633 if not isinstance(renderer, dict):
3634 continue
3635 video_id = renderer.get('videoId')
3636 if not video_id:
3637 continue
3638 yield self._extract_video(renderer)
3639
3640 def _rich_entries(self, rich_grid_renderer):
3641 renderer = try_get(
3642 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3643 video_id = renderer.get('videoId')
3644 if not video_id:
3645 return
3646 yield self._extract_video(renderer)
3647
3648 def _video_entry(self, video_renderer):
3649 video_id = video_renderer.get('videoId')
3650 if video_id:
3651 return self._extract_video(video_renderer)
3652
3653 def _post_thread_entries(self, post_thread_renderer):
3654 post_renderer = try_get(
3655 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3656 if not post_renderer:
3657 return
3658 # video attachment
3659 video_renderer = try_get(
3660 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3661 video_id = video_renderer.get('videoId')
3662 if video_id:
3663 entry = self._extract_video(video_renderer)
3664 if entry:
3665 yield entry
3666 # playlist attachment
3667 playlist_id = try_get(
3668 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3669 if playlist_id:
3670 yield self.url_result(
3671 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3672 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3673 # inline video links
3674 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3675 for run in runs:
3676 if not isinstance(run, dict):
3677 continue
3678 ep_url = try_get(
3679 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3680 if not ep_url:
3681 continue
3682 if not YoutubeIE.suitable(ep_url):
3683 continue
3684 ep_video_id = YoutubeIE._match_id(ep_url)
3685 if video_id == ep_video_id:
3686 continue
3687 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3688
3689 def _post_thread_continuation_entries(self, post_thread_continuation):
3690 contents = post_thread_continuation.get('contents')
3691 if not isinstance(contents, list):
3692 return
3693 for content in contents:
3694 renderer = content.get('backstagePostThreadRenderer')
3695 if not isinstance(renderer, dict):
3696 continue
3697 for entry in self._post_thread_entries(renderer):
3698 yield entry
3699
3700 r''' # unused
3701 def _rich_grid_entries(self, contents):
3702 for content in contents:
3703 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3704 if video_renderer:
3705 entry = self._video_entry(video_renderer)
3706 if entry:
3707 yield entry
3708 '''
3709 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3710
3711 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3712 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3713 for content in contents:
3714 if not isinstance(content, dict):
3715 continue
3716 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3717 if not is_renderer:
3718 renderer = content.get('richItemRenderer')
3719 if renderer:
3720 for entry in self._rich_entries(renderer):
3721 yield entry
3722 continuation_list[0] = self._extract_continuation(parent_renderer)
3723 continue
3724 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3725 for isr_content in isr_contents:
3726 if not isinstance(isr_content, dict):
3727 continue
3728
3729 known_renderers = {
3730 'playlistVideoListRenderer': self._playlist_entries,
3731 'gridRenderer': self._grid_entries,
3732 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3733 'backstagePostThreadRenderer': self._post_thread_entries,
3734 'videoRenderer': lambda x: [self._video_entry(x)],
3735 }
3736 for key, renderer in isr_content.items():
3737 if key not in known_renderers:
3738 continue
3739 for entry in known_renderers[key](renderer):
3740 if entry:
3741 yield entry
3742 continuation_list[0] = self._extract_continuation(renderer)
3743 break
3744
3745 if not continuation_list[0]:
3746 continuation_list[0] = self._extract_continuation(is_renderer)
3747
3748 if not continuation_list[0]:
3749 continuation_list[0] = self._extract_continuation(parent_renderer)
3750
3751 continuation_list = [None] # Python 2 doesnot support nonlocal
3752 tab_content = try_get(tab, lambda x: x['content'], dict)
3753 if not tab_content:
3754 return
3755 parent_renderer = (
3756 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3757 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3758 for entry in extract_entries(parent_renderer):
3759 yield entry
3760 continuation = continuation_list[0]
3761 visitor_data = None
3762
3763 for page_num in itertools.count(1):
3764 if not continuation:
3765 break
3766 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3767 response = self._extract_response(
3768 item_id='%s page %s' % (item_id, page_num),
3769 query=continuation, headers=headers, ytcfg=ytcfg,
3770 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3771
3772 if not response:
3773 break
3774 visitor_data = try_get(
3775 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3776
3777 known_continuation_renderers = {
3778 'playlistVideoListContinuation': self._playlist_entries,
3779 'gridContinuation': self._grid_entries,
3780 'itemSectionContinuation': self._post_thread_continuation_entries,
3781 'sectionListContinuation': extract_entries, # for feeds
3782 }
3783 continuation_contents = try_get(
3784 response, lambda x: x['continuationContents'], dict) or {}
3785 continuation_renderer = None
3786 for key, value in continuation_contents.items():
3787 if key not in known_continuation_renderers:
3788 continue
3789 continuation_renderer = value
3790 continuation_list = [None]
3791 for entry in known_continuation_renderers[key](continuation_renderer):
3792 yield entry
3793 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3794 break
3795 if continuation_renderer:
3796 continue
3797
3798 known_renderers = {
3799 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3800 'gridVideoRenderer': (self._grid_entries, 'items'),
3801 'gridChannelRenderer': (self._grid_entries, 'items'),
3802 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3803 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3804 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3805 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3806 }
3807 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3808 continuation_items = try_get(
3809 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3810 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3811 video_items_renderer = None
3812 for key, value in continuation_item.items():
3813 if key not in known_renderers:
3814 continue
3815 video_items_renderer = {known_renderers[key][1]: continuation_items}
3816 continuation_list = [None]
3817 for entry in known_renderers[key][0](video_items_renderer):
3818 yield entry
3819 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3820 break
3821 if video_items_renderer:
3822 continue
3823 break
3824
3825 @staticmethod
3826 def _extract_selected_tab(tabs):
3827 for tab in tabs:
3828 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3829 if renderer.get('selected') is True:
3830 return renderer
3831 else:
3832 raise ExtractorError('Unable to find selected tab')
3833
3834 @classmethod
3835 def _extract_uploader(cls, data):
3836 uploader = {}
3837 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3838 owner = try_get(
3839 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3840 if owner:
3841 uploader['uploader'] = owner.get('text')
3842 uploader['uploader_id'] = try_get(
3843 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3844 uploader['uploader_url'] = urljoin(
3845 'https://www.youtube.com/',
3846 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3847 return {k: v for k, v in uploader.items() if v is not None}
3848
3849 def _extract_from_tabs(self, item_id, webpage, data, tabs):
3850 playlist_id = title = description = channel_url = channel_name = channel_id = None
3851 thumbnails_list = tags = []
3852
3853 selected_tab = self._extract_selected_tab(tabs)
3854 renderer = try_get(
3855 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3856 if renderer:
3857 channel_name = renderer.get('title')
3858 channel_url = renderer.get('channelUrl')
3859 channel_id = renderer.get('externalId')
3860 else:
3861 renderer = try_get(
3862 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3863
3864 if renderer:
3865 title = renderer.get('title')
3866 description = renderer.get('description', '')
3867 playlist_id = channel_id
3868 tags = renderer.get('keywords', '').split()
3869 thumbnails_list = (
3870 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3871 or try_get(
3872 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3873 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3874 list)
3875 or [])
3876
3877 thumbnails = []
3878 for t in thumbnails_list:
3879 if not isinstance(t, dict):
3880 continue
3881 thumbnail_url = url_or_none(t.get('url'))
3882 if not thumbnail_url:
3883 continue
3884 thumbnails.append({
3885 'url': thumbnail_url,
3886 'width': int_or_none(t.get('width')),
3887 'height': int_or_none(t.get('height')),
3888 })
3889 if playlist_id is None:
3890 playlist_id = item_id
3891 if title is None:
3892 title = (
3893 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3894 or playlist_id)
3895 title += format_field(selected_tab, 'title', ' - %s')
3896 title += format_field(selected_tab, 'expandedText', ' - %s')
3897 metadata = {
3898 'playlist_id': playlist_id,
3899 'playlist_title': title,
3900 'playlist_description': description,
3901 'uploader': channel_name,
3902 'uploader_id': channel_id,
3903 'uploader_url': channel_url,
3904 'thumbnails': thumbnails,
3905 'tags': tags,
3906 }
3907 availability = self._extract_availability(data)
3908 if availability:
3909 metadata['availability'] = availability
3910 if not channel_id:
3911 metadata.update(self._extract_uploader(data))
3912 metadata.update({
3913 'channel': metadata['uploader'],
3914 'channel_id': metadata['uploader_id'],
3915 'channel_url': metadata['uploader_url']})
3916 ytcfg = self.extract_ytcfg(item_id, webpage)
3917 return self.playlist_result(
3918 self._entries(
3919 selected_tab, playlist_id,
3920 self._extract_identity_token(webpage, item_id),
3921 self._extract_account_syncid(ytcfg, data), ytcfg),
3922 **metadata)
3923
3924 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
3925 first_id = last_id = None
3926 ytcfg = self.extract_ytcfg(playlist_id, webpage)
3927 headers = self.generate_api_headers(
3928 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3929 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
3930 for page_num in itertools.count(1):
3931 videos = list(self._playlist_entries(playlist))
3932 if not videos:
3933 return
3934 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3935 if start >= len(videos):
3936 return
3937 for video in videos[start:]:
3938 if video['id'] == first_id:
3939 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3940 return
3941 yield video
3942 first_id = first_id or videos[0]['id']
3943 last_id = videos[-1]['id']
3944 watch_endpoint = try_get(
3945 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3946 query = {
3947 'playlistId': playlist_id,
3948 'videoId': watch_endpoint.get('videoId') or last_id,
3949 'index': watch_endpoint.get('index') or len(videos),
3950 'params': watch_endpoint.get('params') or 'OAE%3D'
3951 }
3952 response = self._extract_response(
3953 item_id='%s page %d' % (playlist_id, page_num),
3954 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3955 check_get_keys='contents'
3956 )
3957 playlist = try_get(
3958 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3959
3960 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
3961 title = playlist.get('title') or try_get(
3962 data, lambda x: x['titleText']['simpleText'], compat_str)
3963 playlist_id = playlist.get('playlistId') or item_id
3964
3965 # Delegating everything except mix playlists to regular tab-based playlist URL
3966 playlist_url = urljoin(url, try_get(
3967 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3968 compat_str))
3969 if playlist_url and playlist_url != url:
3970 return self.url_result(
3971 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3972 video_title=title)
3973
3974 return self.playlist_result(
3975 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
3976 playlist_id=playlist_id, playlist_title=title)
3977
3978 def _extract_availability(self, data):
3979 """
3980 Gets the availability of a given playlist/tab.
3981 Note: Unless YouTube tells us explicitly, we do not assume it is public
3982 @param data: response
3983 """
3984 is_private = is_unlisted = None
3985 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3986 badge_labels = self._extract_badges(renderer)
3987
3988 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3989 privacy_dropdown_entries = try_get(
3990 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3991 for renderer_dict in privacy_dropdown_entries:
3992 is_selected = try_get(
3993 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3994 if not is_selected:
3995 continue
3996 label = self._get_text(
3997 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
3998 if label:
3999 badge_labels.add(label.lower())
4000 break
4001
4002 for badge_label in badge_labels:
4003 if badge_label == 'unlisted':
4004 is_unlisted = True
4005 elif badge_label == 'private':
4006 is_private = True
4007 elif badge_label == 'public':
4008 is_unlisted = is_private = False
4009 return self._availability(is_private, False, False, False, is_unlisted)
4010
4011 @staticmethod
4012 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4013 sidebar_renderer = try_get(
4014 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4015 for item in sidebar_renderer:
4016 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4017 if renderer:
4018 return renderer
4019
4020 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4021 """
4022 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4023 """
4024 browse_id = params = None
4025 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4026 if not renderer:
4027 return
4028 menu_renderer = try_get(
4029 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4030 for menu_item in menu_renderer:
4031 if not isinstance(menu_item, dict):
4032 continue
4033 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4034 text = try_get(
4035 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4036 if not text or text.lower() != 'show unavailable videos':
4037 continue
4038 browse_endpoint = try_get(
4039 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4040 browse_id = browse_endpoint.get('browseId')
4041 params = browse_endpoint.get('params')
4042 break
4043
4044 ytcfg = self.extract_ytcfg(item_id, webpage)
4045 headers = self.generate_api_headers(
4046 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4047 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4048 visitor_data=try_get(
4049 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4050 query = {
4051 'params': params or 'wgYCCAA=',
4052 'browseId': browse_id or 'VL%s' % item_id
4053 }
4054 return self._extract_response(
4055 item_id=item_id, headers=headers, query=query,
4056 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4057 note='Downloading API JSON with unavailable videos')
4058
4059 def _extract_webpage(self, url, item_id):
4060 retries = self.get_param('extractor_retries', 3)
4061 count = -1
4062 last_error = 'Incomplete yt initial data recieved'
4063 while count < retries:
4064 count += 1
4065 # Sometimes youtube returns a webpage with incomplete ytInitialData
4066 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4067 if count:
4068 self.report_warning('%s. Retrying ...' % last_error)
4069 webpage = self._download_webpage(
4070 url, item_id,
4071 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4072 data = self.extract_yt_initial_data(item_id, webpage)
4073 if data.get('contents') or data.get('currentVideoEndpoint'):
4074 break
4075 # Extract alerts here only when there is error
4076 self._extract_and_report_alerts(data)
4077 if count >= retries:
4078 raise ExtractorError(last_error)
4079 return webpage, data
4080
4081 @staticmethod
4082 def _smuggle_data(entries, data):
4083 for entry in entries:
4084 if data:
4085 entry['url'] = smuggle_url(entry['url'], data)
4086 yield entry
4087
4088 def _real_extract(self, url):
4089 url, smuggled_data = unsmuggle_url(url, {})
4090 if self.is_music_url(url):
4091 smuggled_data['is_music_url'] = True
4092 info_dict = self.__real_extract(url, smuggled_data)
4093 if info_dict.get('entries'):
4094 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4095 return info_dict
4096
4097 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4098
4099 def __real_extract(self, url, smuggled_data):
4100 item_id = self._match_id(url)
4101 url = compat_urlparse.urlunparse(
4102 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4103 compat_opts = self.get_param('compat_opts', [])
4104
4105 def get_mobj(url):
4106 mobj = self._url_re.match(url).groupdict()
4107 mobj.update((k, '') for k, v in mobj.items() if v is None)
4108 return mobj
4109
4110 mobj = get_mobj(url)
4111 # Youtube returns incomplete data if tabname is not lower case
4112 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4113
4114 if is_channel:
4115 if smuggled_data.get('is_music_url'):
4116 if item_id[:2] == 'VL':
4117 # Youtube music VL channels have an equivalent playlist
4118 item_id = item_id[2:]
4119 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4120 elif item_id[:2] == 'MP':
4121 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4122 item_id = self._search_regex(
4123 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4124 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4125 'playlist id')
4126 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4127 elif mobj['channel_type'] == 'browse':
4128 # Youtube music /browse/ should be changed to /channel/
4129 pre = 'https://www.youtube.com/channel/%s' % item_id
4130 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4131 # Home URLs should redirect to /videos/
4132 self.report_warning(
4133 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4134 'To download only the videos in the home page, add a "/featured" to the URL')
4135 tab = '/videos'
4136
4137 url = ''.join((pre, tab, post))
4138 mobj = get_mobj(url)
4139
4140 # Handle both video/playlist URLs
4141 qs = parse_qs(url)
4142 video_id = qs.get('v', [None])[0]
4143 playlist_id = qs.get('list', [None])[0]
4144
4145 if not video_id and mobj['not_channel'].startswith('watch'):
4146 if not playlist_id:
4147 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4148 raise ExtractorError('Unable to recognize tab page')
4149 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4150 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4151 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4152 mobj = get_mobj(url)
4153
4154 if video_id and playlist_id:
4155 if self.get_param('noplaylist'):
4156 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4157 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4158 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4159
4160 webpage, data = self._extract_webpage(url, item_id)
4161
4162 tabs = try_get(
4163 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4164 if tabs:
4165 selected_tab = self._extract_selected_tab(tabs)
4166 tab_name = selected_tab.get('title', '')
4167 if 'no-youtube-channel-redirect' not in compat_opts:
4168 if mobj['tab'] == '/live':
4169 # Live tab should have redirected to the video
4170 raise ExtractorError('The channel is not currently live', expected=True)
4171 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4172 if not mobj['not_channel'] and item_id[:2] == 'UC':
4173 # Topic channels don't have /videos. Use the equivalent playlist instead
4174 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4175 pl_id = 'UU%s' % item_id[2:]
4176 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4177 try:
4178 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4179 for alert_type, alert_message in self._extract_alerts(pl_data):
4180 if alert_type == 'error':
4181 raise ExtractorError('Youtube said: %s' % alert_message)
4182 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4183 except ExtractorError:
4184 self.report_warning('The playlist gave error. Falling back to channel URL')
4185 else:
4186 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4187
4188 self.write_debug('Final URL: %s' % url)
4189
4190 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4191 if 'no-youtube-unavailable-videos' not in compat_opts:
4192 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4193 self._extract_and_report_alerts(data)
4194 tabs = try_get(
4195 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4196 if tabs:
4197 return self._extract_from_tabs(item_id, webpage, data, tabs)
4198
4199 playlist = try_get(
4200 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4201 if playlist:
4202 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4203
4204 video_id = try_get(
4205 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4206 compat_str) or video_id
4207 if video_id:
4208 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4209 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4210 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4211
4212 raise ExtractorError('Unable to recognize tab page')
4213
4214
4215 class YoutubePlaylistIE(InfoExtractor):
4216 IE_DESC = 'YouTube.com playlists'
4217 _VALID_URL = r'''(?x)(?:
4218 (?:https?://)?
4219 (?:\w+\.)?
4220 (?:
4221 (?:
4222 youtube(?:kids)?\.com|
4223 invidio\.us
4224 )
4225 /.*?\?.*?\blist=
4226 )?
4227 (?P<id>%(playlist_id)s)
4228 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4229 IE_NAME = 'youtube:playlist'
4230 _TESTS = [{
4231 'note': 'issue #673',
4232 'url': 'PLBB231211A4F62143',
4233 'info_dict': {
4234 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4235 'id': 'PLBB231211A4F62143',
4236 'uploader': 'Wickydoo',
4237 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4238 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4239 },
4240 'playlist_mincount': 29,
4241 }, {
4242 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4243 'info_dict': {
4244 'title': 'YDL_safe_search',
4245 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4246 },
4247 'playlist_count': 2,
4248 'skip': 'This playlist is private',
4249 }, {
4250 'note': 'embedded',
4251 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4252 'playlist_count': 4,
4253 'info_dict': {
4254 'title': 'JODA15',
4255 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4256 'uploader': 'milan',
4257 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4258 }
4259 }, {
4260 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4261 'playlist_mincount': 654,
4262 'info_dict': {
4263 'title': '2018 Chinese New Singles (11/6 updated)',
4264 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4265 'uploader': 'LBK',
4266 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4267 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4268 }
4269 }, {
4270 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4271 'only_matching': True,
4272 }, {
4273 # music album playlist
4274 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4275 'only_matching': True,
4276 }]
4277
4278 @classmethod
4279 def suitable(cls, url):
4280 if YoutubeTabIE.suitable(url):
4281 return False
4282 # Hack for lazy extractors until more generic solution is implemented
4283 # (see #28780)
4284 from .youtube import parse_qs
4285 qs = parse_qs(url)
4286 if qs.get('v', [None])[0]:
4287 return False
4288 return super(YoutubePlaylistIE, cls).suitable(url)
4289
4290 def _real_extract(self, url):
4291 playlist_id = self._match_id(url)
4292 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4293 url = update_url_query(
4294 'https://www.youtube.com/playlist',
4295 parse_qs(url) or {'list': playlist_id})
4296 if is_music_url:
4297 url = smuggle_url(url, {'is_music_url': True})
4298 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4299
4300
4301 class YoutubeYtBeIE(InfoExtractor):
4302 IE_DESC = 'youtu.be'
4303 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4304 _TESTS = [{
4305 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4306 'info_dict': {
4307 'id': 'yeWKywCrFtk',
4308 'ext': 'mp4',
4309 'title': 'Small Scale Baler and Braiding Rugs',
4310 'uploader': 'Backus-Page House Museum',
4311 'uploader_id': 'backuspagemuseum',
4312 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4313 'upload_date': '20161008',
4314 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4315 'categories': ['Nonprofits & Activism'],
4316 'tags': list,
4317 'like_count': int,
4318 'dislike_count': int,
4319 },
4320 'params': {
4321 'noplaylist': True,
4322 'skip_download': True,
4323 },
4324 }, {
4325 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4326 'only_matching': True,
4327 }]
4328
4329 def _real_extract(self, url):
4330 mobj = re.match(self._VALID_URL, url)
4331 video_id = mobj.group('id')
4332 playlist_id = mobj.group('playlist_id')
4333 return self.url_result(
4334 update_url_query('https://www.youtube.com/watch', {
4335 'v': video_id,
4336 'list': playlist_id,
4337 'feature': 'youtu.be',
4338 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4339
4340
4341 class YoutubeYtUserIE(InfoExtractor):
4342 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4343 _VALID_URL = r'ytuser:(?P<id>.+)'
4344 _TESTS = [{
4345 'url': 'ytuser:phihag',
4346 'only_matching': True,
4347 }]
4348
4349 def _real_extract(self, url):
4350 user_id = self._match_id(url)
4351 return self.url_result(
4352 'https://www.youtube.com/user/%s' % user_id,
4353 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4354
4355
4356 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4357 IE_NAME = 'youtube:favorites'
4358 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4359 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4360 _LOGIN_REQUIRED = True
4361 _TESTS = [{
4362 'url': ':ytfav',
4363 'only_matching': True,
4364 }, {
4365 'url': ':ytfavorites',
4366 'only_matching': True,
4367 }]
4368
4369 def _real_extract(self, url):
4370 return self.url_result(
4371 'https://www.youtube.com/playlist?list=LL',
4372 ie=YoutubeTabIE.ie_key())
4373
4374
4375 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4376 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4377 # there doesn't appear to be a real limit, for example if you search for
4378 # 'python' you get more than 8.000.000 results
4379 _MAX_RESULTS = float('inf')
4380 IE_NAME = 'youtube:search'
4381 _SEARCH_KEY = 'ytsearch'
4382 _SEARCH_PARAMS = None
4383 _TESTS = []
4384
4385 def _entries(self, query, n):
4386 data = {'query': query}
4387 if self._SEARCH_PARAMS:
4388 data['params'] = self._SEARCH_PARAMS
4389 total = 0
4390 continuation = {}
4391 for page_num in itertools.count(1):
4392 data.update(continuation)
4393 search = self._extract_response(
4394 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4395 check_get_keys=('contents', 'onResponseReceivedCommands')
4396 )
4397 if not search:
4398 break
4399 slr_contents = try_get(
4400 search,
4401 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4402 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4403 list)
4404 if not slr_contents:
4405 break
4406
4407 # Youtube sometimes adds promoted content to searches,
4408 # changing the index location of videos and token.
4409 # So we search through all entries till we find them.
4410 continuation = None
4411 for slr_content in slr_contents:
4412 if not continuation:
4413 continuation = self._extract_continuation({'contents': [slr_content]})
4414
4415 isr_contents = try_get(
4416 slr_content,
4417 lambda x: x['itemSectionRenderer']['contents'],
4418 list)
4419 if not isr_contents:
4420 continue
4421 for content in isr_contents:
4422 if not isinstance(content, dict):
4423 continue
4424 video = content.get('videoRenderer')
4425 if not isinstance(video, dict):
4426 continue
4427 video_id = video.get('videoId')
4428 if not video_id:
4429 continue
4430
4431 yield self._extract_video(video)
4432 total += 1
4433 if total == n:
4434 return
4435
4436 if not continuation:
4437 break
4438
4439 def _get_n_results(self, query, n):
4440 """Get a specified number of results for a query"""
4441 return self.playlist_result(self._entries(query, n), query, query)
4442
4443
4444 class YoutubeSearchDateIE(YoutubeSearchIE):
4445 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4446 _SEARCH_KEY = 'ytsearchdate'
4447 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4448 _SEARCH_PARAMS = 'CAI%3D'
4449
4450
4451 class YoutubeSearchURLIE(YoutubeSearchIE):
4452 IE_DESC = 'YouTube.com search URLs'
4453 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4454 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4455 # _MAX_RESULTS = 100
4456 _TESTS = [{
4457 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4458 'playlist_mincount': 5,
4459 'info_dict': {
4460 'id': 'youtube-dl test video',
4461 'title': 'youtube-dl test video',
4462 }
4463 }, {
4464 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4465 'only_matching': True,
4466 }]
4467
4468 @classmethod
4469 def _make_valid_url(cls):
4470 return cls._VALID_URL
4471
4472 def _real_extract(self, url):
4473 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4474 query = (qs.get('search_query') or qs.get('q'))[0]
4475 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4476 return self._get_n_results(query, self._MAX_RESULTS)
4477
4478
4479 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4480 """
4481 Base class for feed extractors
4482 Subclasses must define the _FEED_NAME property.
4483 """
4484 _LOGIN_REQUIRED = True
4485 _TESTS = []
4486
4487 @property
4488 def IE_NAME(self):
4489 return 'youtube:%s' % self._FEED_NAME
4490
4491 def _real_extract(self, url):
4492 return self.url_result(
4493 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4494 ie=YoutubeTabIE.ie_key())
4495
4496
4497 class YoutubeWatchLaterIE(InfoExtractor):
4498 IE_NAME = 'youtube:watchlater'
4499 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4500 _VALID_URL = r':ytwatchlater'
4501 _TESTS = [{
4502 'url': ':ytwatchlater',
4503 'only_matching': True,
4504 }]
4505
4506 def _real_extract(self, url):
4507 return self.url_result(
4508 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4509
4510
4511 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4512 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4513 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4514 _FEED_NAME = 'recommended'
4515 _LOGIN_REQUIRED = False
4516 _TESTS = [{
4517 'url': ':ytrec',
4518 'only_matching': True,
4519 }, {
4520 'url': ':ytrecommended',
4521 'only_matching': True,
4522 }, {
4523 'url': 'https://youtube.com',
4524 'only_matching': True,
4525 }]
4526
4527
4528 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4529 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4530 _VALID_URL = r':ytsub(?:scription)?s?'
4531 _FEED_NAME = 'subscriptions'
4532 _TESTS = [{
4533 'url': ':ytsubs',
4534 'only_matching': True,
4535 }, {
4536 'url': ':ytsubscriptions',
4537 'only_matching': True,
4538 }]
4539
4540
4541 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4542 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4543 _VALID_URL = r':ythis(?:tory)?'
4544 _FEED_NAME = 'history'
4545 _TESTS = [{
4546 'url': ':ythistory',
4547 'only_matching': True,
4548 }]
4549
4550
4551 class YoutubeTruncatedURLIE(InfoExtractor):
4552 IE_NAME = 'youtube:truncated_url'
4553 IE_DESC = False # Do not list
4554 _VALID_URL = r'''(?x)
4555 (?:https?://)?
4556 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4557 (?:watch\?(?:
4558 feature=[a-z_]+|
4559 annotation_id=annotation_[^&]+|
4560 x-yt-cl=[0-9]+|
4561 hl=[^&]*|
4562 t=[0-9]+
4563 )?
4564 |
4565 attribution_link\?a=[^&]+
4566 )
4567 $
4568 '''
4569
4570 _TESTS = [{
4571 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4572 'only_matching': True,
4573 }, {
4574 'url': 'https://www.youtube.com/watch?',
4575 'only_matching': True,
4576 }, {
4577 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4578 'only_matching': True,
4579 }, {
4580 'url': 'https://www.youtube.com/watch?feature=foo',
4581 'only_matching': True,
4582 }, {
4583 'url': 'https://www.youtube.com/watch?hl=en-GB',
4584 'only_matching': True,
4585 }, {
4586 'url': 'https://www.youtube.com/watch?t=2372',
4587 'only_matching': True,
4588 }]
4589
4590 def _real_extract(self, url):
4591 raise ExtractorError(
4592 'Did you forget to quote the URL? Remember that & is a meta '
4593 'character in most shells, so you want to put the URL in quotes, '
4594 'like youtube-dl '
4595 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4596 ' or simply youtube-dl BaW_jenozKc .',
4597 expected=True)
4598
4599
4600 class YoutubeTruncatedIDIE(InfoExtractor):
4601 IE_NAME = 'youtube:truncated_id'
4602 IE_DESC = False # Do not list
4603 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4604
4605 _TESTS = [{
4606 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4607 'only_matching': True,
4608 }]
4609
4610 def _real_extract(self, url):
4611 video_id = self._match_id(url)
4612 raise ExtractorError(
4613 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4614 expected=True)