]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[cleanup] Misc
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import base64
6 import calendar
7 import copy
8 import datetime
9 import hashlib
10 import itertools
11 import json
12 import os.path
13 import random
14 import re
15 import time
16 import traceback
17
18 from .common import InfoExtractor, SearchInfoExtractor
19 from ..compat import (
20 compat_chr,
21 compat_HTTPError,
22 compat_parse_qs,
23 compat_str,
24 compat_urllib_parse_unquote_plus,
25 compat_urllib_parse_urlencode,
26 compat_urllib_parse_urlparse,
27 compat_urlparse,
28 )
29 from ..jsinterp import JSInterpreter
30 from ..utils import (
31 bytes_to_intlist,
32 clean_html,
33 datetime_from_str,
34 dict_get,
35 error_to_compat_str,
36 ExtractorError,
37 float_or_none,
38 format_field,
39 int_or_none,
40 intlist_to_bytes,
41 is_html,
42 mimetype2ext,
43 network_exceptions,
44 orderedSet,
45 parse_codecs,
46 parse_count,
47 parse_duration,
48 parse_iso8601,
49 parse_qs,
50 qualities,
51 remove_end,
52 remove_start,
53 smuggle_url,
54 str_or_none,
55 str_to_int,
56 traverse_obj,
57 try_get,
58 unescapeHTML,
59 unified_strdate,
60 unsmuggle_url,
61 update_url_query,
62 url_or_none,
63 urljoin,
64 variadic,
65 )
66
67
68 # any clients starting with _ cannot be explicity requested by the user
69 INNERTUBE_CLIENTS = {
70 'web': {
71 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
72 'INNERTUBE_CONTEXT': {
73 'client': {
74 'clientName': 'WEB',
75 'clientVersion': '2.20210622.10.00',
76 }
77 },
78 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
79 },
80 'web_embedded': {
81 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
82 'INNERTUBE_CONTEXT': {
83 'client': {
84 'clientName': 'WEB_EMBEDDED_PLAYER',
85 'clientVersion': '1.20210620.0.1',
86 },
87 },
88 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
89 },
90 'web_music': {
91 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
92 'INNERTUBE_HOST': 'music.youtube.com',
93 'INNERTUBE_CONTEXT': {
94 'client': {
95 'clientName': 'WEB_REMIX',
96 'clientVersion': '1.20210621.00.00',
97 }
98 },
99 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
100 },
101 'web_creator': {
102 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
103 'INNERTUBE_CONTEXT': {
104 'client': {
105 'clientName': 'WEB_CREATOR',
106 'clientVersion': '1.20210621.00.00',
107 }
108 },
109 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
110 },
111 'android': {
112 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
113 'INNERTUBE_CONTEXT': {
114 'client': {
115 'clientName': 'ANDROID',
116 'clientVersion': '16.20',
117 }
118 },
119 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
120 },
121 'android_embedded': {
122 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
123 'INNERTUBE_CONTEXT': {
124 'client': {
125 'clientName': 'ANDROID_EMBEDDED_PLAYER',
126 'clientVersion': '16.20',
127 },
128 },
129 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
130 },
131 'android_music': {
132 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
133 'INNERTUBE_HOST': 'music.youtube.com',
134 'INNERTUBE_CONTEXT': {
135 'client': {
136 'clientName': 'ANDROID_MUSIC',
137 'clientVersion': '4.32',
138 }
139 },
140 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
141 },
142 'android_creator': {
143 'INNERTUBE_CONTEXT': {
144 'client': {
145 'clientName': 'ANDROID_CREATOR',
146 'clientVersion': '21.24.100',
147 },
148 },
149 'INNERTUBE_CONTEXT_CLIENT_NAME': 14
150 },
151 # ios has HLS live streams
152 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
153 'ios': {
154 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
155 'INNERTUBE_CONTEXT': {
156 'client': {
157 'clientName': 'IOS',
158 'clientVersion': '16.20',
159 }
160 },
161 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
162 },
163 'ios_embedded': {
164 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
165 'INNERTUBE_CONTEXT': {
166 'client': {
167 'clientName': 'IOS_MESSAGES_EXTENSION',
168 'clientVersion': '16.20',
169 },
170 },
171 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
172 },
173 'ios_music': {
174 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
175 'INNERTUBE_HOST': 'music.youtube.com',
176 'INNERTUBE_CONTEXT': {
177 'client': {
178 'clientName': 'IOS_MUSIC',
179 'clientVersion': '4.32',
180 },
181 },
182 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
183 },
184 'ios_creator': {
185 'INNERTUBE_CONTEXT': {
186 'client': {
187 'clientName': 'IOS_CREATOR',
188 'clientVersion': '21.24.100',
189 },
190 },
191 'INNERTUBE_CONTEXT_CLIENT_NAME': 15
192 },
193 # mweb has 'ultralow' formats
194 # See: https://github.com/yt-dlp/yt-dlp/pull/557
195 'mweb': {
196 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
197 'INNERTUBE_CONTEXT': {
198 'client': {
199 'clientName': 'MWEB',
200 'clientVersion': '2.20210721.07.00',
201 }
202 },
203 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
204 },
205 }
206
207
208 def build_innertube_clients():
209 third_party = {
210 'embedUrl': 'https://google.com', # Can be any valid URL
211 }
212 base_clients = ('android', 'web', 'ios', 'mweb')
213 priority = qualities(base_clients[::-1])
214
215 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
216 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
217 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
218 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
219 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
220
221 if client in base_clients:
222 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
223 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
224 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
225 agegate_ytcfg['priority'] -= 1
226 elif client.endswith('_embedded'):
227 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
228 ytcfg['priority'] -= 2
229 else:
230 ytcfg['priority'] -= 3
231
232
233 build_innertube_clients()
234
235
236 class YoutubeBaseInfoExtractor(InfoExtractor):
237 """Provide base functions for Youtube extractors"""
238
239 _RESERVED_NAMES = (
240 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
241 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
242 r'browse|oembed|get_video_info|iframe_api|s/player|'
243 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
244
245 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
246
247 _NETRC_MACHINE = 'youtube'
248
249 # If True it will raise an error if no login info is provided
250 _LOGIN_REQUIRED = False
251
252 r''' # Unused since login is broken
253 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
254 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
255
256 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
257 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
258 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
259 '''
260
261 def _login(self):
262 """
263 Attempt to log in to YouTube.
264 True is returned if successful or skipped.
265 False is returned if login failed.
266
267 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
268 """
269
270 def warn(message):
271 self.report_warning(message)
272
273 # username+password login is broken
274 if (self._LOGIN_REQUIRED
275 and self.get_param('cookiefile') is None
276 and self.get_param('cookiesfrombrowser') is None):
277 self.raise_login_required(
278 'Login details are needed to download this content', method='cookies')
279 username, password = self._get_login_info()
280 if username:
281 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
282 return
283
284 # Everything below this is broken!
285 r'''
286 # No authentication to be performed
287 if username is None:
288 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
289 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
290 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
291 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
292 return True
293
294 login_page = self._download_webpage(
295 self._LOGIN_URL, None,
296 note='Downloading login page',
297 errnote='unable to fetch login page', fatal=False)
298 if login_page is False:
299 return
300
301 login_form = self._hidden_inputs(login_page)
302
303 def req(url, f_req, note, errnote):
304 data = login_form.copy()
305 data.update({
306 'pstMsg': 1,
307 'checkConnection': 'youtube',
308 'checkedDomains': 'youtube',
309 'hl': 'en',
310 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
311 'f.req': json.dumps(f_req),
312 'flowName': 'GlifWebSignIn',
313 'flowEntry': 'ServiceLogin',
314 # TODO: reverse actual botguard identifier generation algo
315 'bgRequest': '["identifier",""]',
316 })
317 return self._download_json(
318 url, None, note=note, errnote=errnote,
319 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
320 fatal=False,
321 data=urlencode_postdata(data), headers={
322 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
323 'Google-Accounts-XSRF': 1,
324 })
325
326 lookup_req = [
327 username,
328 None, [], None, 'US', None, None, 2, False, True,
329 [
330 None, None,
331 [2, 1, None, 1,
332 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
333 None, [], 4],
334 1, [None, None, []], None, None, None, True
335 ],
336 username,
337 ]
338
339 lookup_results = req(
340 self._LOOKUP_URL, lookup_req,
341 'Looking up account info', 'Unable to look up account info')
342
343 if lookup_results is False:
344 return False
345
346 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
347 if not user_hash:
348 warn('Unable to extract user hash')
349 return False
350
351 challenge_req = [
352 user_hash,
353 None, 1, None, [1, None, None, None, [password, None, True]],
354 [
355 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
356 1, [None, None, []], None, None, None, True
357 ]]
358
359 challenge_results = req(
360 self._CHALLENGE_URL, challenge_req,
361 'Logging in', 'Unable to log in')
362
363 if challenge_results is False:
364 return
365
366 login_res = try_get(challenge_results, lambda x: x[0][5], list)
367 if login_res:
368 login_msg = try_get(login_res, lambda x: x[5], compat_str)
369 warn(
370 'Unable to login: %s' % 'Invalid password'
371 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
372 return False
373
374 res = try_get(challenge_results, lambda x: x[0][-1], list)
375 if not res:
376 warn('Unable to extract result entry')
377 return False
378
379 login_challenge = try_get(res, lambda x: x[0][0], list)
380 if login_challenge:
381 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
382 if challenge_str == 'TWO_STEP_VERIFICATION':
383 # SEND_SUCCESS - TFA code has been successfully sent to phone
384 # QUOTA_EXCEEDED - reached the limit of TFA codes
385 status = try_get(login_challenge, lambda x: x[5], compat_str)
386 if status == 'QUOTA_EXCEEDED':
387 warn('Exceeded the limit of TFA codes, try later')
388 return False
389
390 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
391 if not tl:
392 warn('Unable to extract TL')
393 return False
394
395 tfa_code = self._get_tfa_info('2-step verification code')
396
397 if not tfa_code:
398 warn(
399 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
400 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
401 return False
402
403 tfa_code = remove_start(tfa_code, 'G-')
404
405 tfa_req = [
406 user_hash, None, 2, None,
407 [
408 9, None, None, None, None, None, None, None,
409 [None, tfa_code, True, 2]
410 ]]
411
412 tfa_results = req(
413 self._TFA_URL.format(tl), tfa_req,
414 'Submitting TFA code', 'Unable to submit TFA code')
415
416 if tfa_results is False:
417 return False
418
419 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
420 if tfa_res:
421 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
422 warn(
423 'Unable to finish TFA: %s' % 'Invalid TFA code'
424 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
425 return False
426
427 check_cookie_url = try_get(
428 tfa_results, lambda x: x[0][-1][2], compat_str)
429 else:
430 CHALLENGES = {
431 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
432 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
433 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
434 }
435 challenge = CHALLENGES.get(
436 challenge_str,
437 '%s returned error %s.' % (self.IE_NAME, challenge_str))
438 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
439 return False
440 else:
441 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
442
443 if not check_cookie_url:
444 warn('Unable to extract CheckCookie URL')
445 return False
446
447 check_cookie_results = self._download_webpage(
448 check_cookie_url, None, 'Checking cookie', fatal=False)
449
450 if check_cookie_results is False:
451 return False
452
453 if 'https://myaccount.google.com/' not in check_cookie_results:
454 warn('Unable to log in')
455 return False
456
457 return True
458 '''
459
460 def _initialize_consent(self):
461 cookies = self._get_cookies('https://www.youtube.com/')
462 if cookies.get('__Secure-3PSID'):
463 return
464 consent_id = None
465 consent = cookies.get('CONSENT')
466 if consent:
467 if 'YES' in consent.value:
468 return
469 consent_id = self._search_regex(
470 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
471 if not consent_id:
472 consent_id = random.randint(100, 999)
473 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
474
475 def _real_initialize(self):
476 self._initialize_consent()
477 if self._downloader is None:
478 return
479 if not self._login():
480 return
481
482 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
483 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
484 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
485
486 def _get_default_ytcfg(self, client='web'):
487 return copy.deepcopy(INNERTUBE_CLIENTS[client])
488
489 def _get_innertube_host(self, client='web'):
490 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
491
492 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
493 # try_get but with fallback to default ytcfg client values when present
494 _func = lambda y: try_get(y, getter, expected_type)
495 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
496
497 def _extract_client_name(self, ytcfg, default_client='web'):
498 return self._ytcfg_get_safe(
499 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
500 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
501
502 @staticmethod
503 def _extract_session_index(*data):
504 for ytcfg in data:
505 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
506 if session_index is not None:
507 return session_index
508
509 def _extract_client_version(self, ytcfg, default_client='web'):
510 return self._ytcfg_get_safe(
511 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
512 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
513
514 def _extract_api_key(self, ytcfg=None, default_client='web'):
515 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
516
517 def _extract_context(self, ytcfg=None, default_client='web'):
518 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
519 context = _get_context(ytcfg)
520 if context:
521 return context
522
523 context = _get_context(self._get_default_ytcfg(default_client))
524 if not ytcfg:
525 return context
526
527 # Recreate the client context (required)
528 context['client'].update({
529 'clientVersion': self._extract_client_version(ytcfg, default_client),
530 'clientName': self._extract_client_name(ytcfg, default_client),
531 })
532 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
533 if visitor_data:
534 context['client']['visitorData'] = visitor_data
535 return context
536
537 _SAPISID = None
538
539 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
540 time_now = round(time.time())
541 if self._SAPISID is None:
542 yt_cookies = self._get_cookies('https://www.youtube.com')
543 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
544 # See: https://github.com/yt-dlp/yt-dlp/issues/393
545 sapisid_cookie = dict_get(
546 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
547 if sapisid_cookie and sapisid_cookie.value:
548 self._SAPISID = sapisid_cookie.value
549 self.write_debug('Extracted SAPISID cookie')
550 # SAPISID cookie is required if not already present
551 if not yt_cookies.get('SAPISID'):
552 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
553 self._set_cookie(
554 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
555 else:
556 self._SAPISID = False
557 if not self._SAPISID:
558 return None
559 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
560 sapisidhash = hashlib.sha1(
561 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
562 return f'SAPISIDHASH {time_now}_{sapisidhash}'
563
564 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
565 note='Downloading API JSON', errnote='Unable to download API page',
566 context=None, api_key=None, api_hostname=None, default_client='web'):
567
568 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
569 data.update(query)
570 real_headers = self.generate_api_headers(default_client=default_client)
571 real_headers.update({'content-type': 'application/json'})
572 if headers:
573 real_headers.update(headers)
574 return self._download_json(
575 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
576 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
577 data=json.dumps(data).encode('utf8'), headers=real_headers,
578 query={'key': api_key or self._extract_api_key()})
579
580 def extract_yt_initial_data(self, video_id, webpage):
581 return self._parse_json(
582 self._search_regex(
583 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
584 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
585 video_id)
586
587 def _extract_identity_token(self, webpage, item_id):
588 if not webpage:
589 return None
590 ytcfg = self.extract_ytcfg(item_id, webpage)
591 if ytcfg:
592 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
593 if token:
594 return token
595 return self._search_regex(
596 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
597 'identity token', default=None)
598
599 @staticmethod
600 def _extract_account_syncid(*args):
601 """
602 Extract syncId required to download private playlists of secondary channels
603 @params response and/or ytcfg
604 """
605 for data in args:
606 # ytcfg includes channel_syncid if on secondary channel
607 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
608 if delegated_sid:
609 return delegated_sid
610 sync_ids = (try_get(
611 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
612 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
613 if len(sync_ids) >= 2 and sync_ids[1]:
614 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
615 # and just "user_syncid||" for primary channel. We only want the channel_syncid
616 return sync_ids[0]
617
618 def extract_ytcfg(self, video_id, webpage):
619 if not webpage:
620 return {}
621 return self._parse_json(
622 self._search_regex(
623 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
624 default='{}'), video_id, fatal=False) or {}
625
626 def generate_api_headers(
627 self, ytcfg=None, identity_token=None, account_syncid=None,
628 visitor_data=None, api_hostname=None, default_client='web', session_index=None):
629 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
630 headers = {
631 'X-YouTube-Client-Name': compat_str(
632 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
633 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
634 'Origin': origin
635 }
636 if not visitor_data and ytcfg:
637 visitor_data = try_get(
638 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
639 if identity_token:
640 headers['X-Youtube-Identity-Token'] = identity_token
641 if account_syncid:
642 headers['X-Goog-PageId'] = account_syncid
643 if session_index is None and ytcfg:
644 session_index = self._extract_session_index(ytcfg)
645 if account_syncid or session_index is not None:
646 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
647 if visitor_data:
648 headers['X-Goog-Visitor-Id'] = visitor_data
649 auth = self._generate_sapisidhash_header(origin)
650 if auth is not None:
651 headers['Authorization'] = auth
652 headers['X-Origin'] = origin
653 return headers
654
655 @staticmethod
656 def _build_api_continuation_query(continuation, ctp=None):
657 query = {
658 'continuation': continuation
659 }
660 # TODO: Inconsistency with clickTrackingParams.
661 # Currently we have a fixed ctp contained within context (from ytcfg)
662 # and a ctp in root query for continuation.
663 if ctp:
664 query['clickTracking'] = {'clickTrackingParams': ctp}
665 return query
666
667 @classmethod
668 def _extract_next_continuation_data(cls, renderer):
669 next_continuation = try_get(
670 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
671 lambda x: x['continuation']['reloadContinuationData']), dict)
672 if not next_continuation:
673 return
674 continuation = next_continuation.get('continuation')
675 if not continuation:
676 return
677 ctp = next_continuation.get('clickTrackingParams')
678 return cls._build_api_continuation_query(continuation, ctp)
679
680 @classmethod
681 def _extract_continuation_ep_data(cls, continuation_ep: dict):
682 if isinstance(continuation_ep, dict):
683 continuation = try_get(
684 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
685 if not continuation:
686 return
687 ctp = continuation_ep.get('clickTrackingParams')
688 return cls._build_api_continuation_query(continuation, ctp)
689
690 @classmethod
691 def _extract_continuation(cls, renderer):
692 next_continuation = cls._extract_next_continuation_data(renderer)
693 if next_continuation:
694 return next_continuation
695
696 contents = []
697 for key in ('contents', 'items'):
698 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
699
700 for content in contents:
701 if not isinstance(content, dict):
702 continue
703 continuation_ep = try_get(
704 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
705 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
706 dict)
707 continuation = cls._extract_continuation_ep_data(continuation_ep)
708 if continuation:
709 return continuation
710
711 @classmethod
712 def _extract_alerts(cls, data):
713 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
714 if not isinstance(alert_dict, dict):
715 continue
716 for alert in alert_dict.values():
717 alert_type = alert.get('type')
718 if not alert_type:
719 continue
720 message = cls._get_text(alert, 'text')
721 if message:
722 yield alert_type, message
723
724 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
725 errors = []
726 warnings = []
727 for alert_type, alert_message in alerts:
728 if alert_type.lower() == 'error' and fatal:
729 errors.append([alert_type, alert_message])
730 else:
731 warnings.append([alert_type, alert_message])
732
733 for alert_type, alert_message in (warnings + errors[:-1]):
734 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
735 if errors:
736 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
737
738 def _extract_and_report_alerts(self, data, *args, **kwargs):
739 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
740
741 def _extract_badges(self, renderer: dict):
742 badges = set()
743 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
744 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
745 if label:
746 badges.add(label.lower())
747 return badges
748
749 @staticmethod
750 def _get_text(data, *path_list, max_runs=None):
751 for path in path_list or [None]:
752 if path is None:
753 obj = [data]
754 else:
755 obj = traverse_obj(data, path, default=[])
756 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
757 obj = [obj]
758 for item in obj:
759 text = try_get(item, lambda x: x['simpleText'], compat_str)
760 if text:
761 return text
762 runs = try_get(item, lambda x: x['runs'], list) or []
763 if not runs and isinstance(item, list):
764 runs = item
765
766 runs = runs[:min(len(runs), max_runs or len(runs))]
767 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
768 if text:
769 return text
770
771 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
772 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
773 default_client='web'):
774 response = None
775 last_error = None
776 count = -1
777 retries = self.get_param('extractor_retries', 3)
778 if check_get_keys is None:
779 check_get_keys = []
780 while count < retries:
781 count += 1
782 if last_error:
783 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
784 try:
785 response = self._call_api(
786 ep=ep, fatal=True, headers=headers,
787 video_id=item_id, query=query,
788 context=self._extract_context(ytcfg, default_client),
789 api_key=self._extract_api_key(ytcfg, default_client),
790 api_hostname=api_hostname, default_client=default_client,
791 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
792 except ExtractorError as e:
793 if isinstance(e.cause, network_exceptions):
794 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
795 e.cause.seek(0)
796 yt_error = try_get(
797 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
798 lambda x: x['error']['message'], compat_str)
799 if yt_error:
800 self._report_alerts([('ERROR', yt_error)], fatal=False)
801 # Downloading page may result in intermittent 5xx HTTP error
802 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
803 # We also want to catch all other network exceptions since errors in later pages can be troublesome
804 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
805 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
806 last_error = error_to_compat_str(e.cause or e.msg)
807 if count < retries:
808 continue
809 if fatal:
810 raise
811 else:
812 self.report_warning(error_to_compat_str(e))
813 return
814
815 else:
816 # Youtube may send alerts if there was an issue with the continuation page
817 try:
818 self._extract_and_report_alerts(response, expected=False, only_once=True)
819 except ExtractorError as e:
820 # YouTube servers may return errors we want to retry on in a 200 OK response
821 # See: https://github.com/yt-dlp/yt-dlp/issues/839
822 if 'unknown error' in e.msg.lower():
823 last_error = e.msg
824 continue
825 if fatal:
826 raise
827 self.report_warning(error_to_compat_str(e))
828 return
829 if not check_get_keys or dict_get(response, check_get_keys):
830 break
831 # Youtube sometimes sends incomplete data
832 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
833 last_error = 'Incomplete data received'
834 if count >= retries:
835 if fatal:
836 raise ExtractorError(last_error)
837 else:
838 self.report_warning(last_error)
839 return
840 return response
841
842 @staticmethod
843 def is_music_url(url):
844 return re.match(r'https?://music\.youtube\.com/', url) is not None
845
846 def _extract_video(self, renderer):
847 video_id = renderer.get('videoId')
848 title = self._get_text(renderer, 'title')
849 description = self._get_text(renderer, 'descriptionSnippet')
850 duration = parse_duration(self._get_text(
851 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
852 view_count_text = self._get_text(renderer, 'viewCountText') or ''
853 view_count = str_to_int(self._search_regex(
854 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
855 'view count', default=None))
856
857 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
858
859 return {
860 '_type': 'url',
861 'ie_key': YoutubeIE.ie_key(),
862 'id': video_id,
863 'url': video_id,
864 'title': title,
865 'description': description,
866 'duration': duration,
867 'view_count': view_count,
868 'uploader': uploader,
869 }
870
871
872 class YoutubeIE(YoutubeBaseInfoExtractor):
873 IE_DESC = 'YouTube.com'
874 _INVIDIOUS_SITES = (
875 # invidious-redirect websites
876 r'(?:www\.)?redirect\.invidious\.io',
877 r'(?:(?:www|dev)\.)?invidio\.us',
878 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
879 r'(?:www\.)?invidious\.pussthecat\.org',
880 r'(?:www\.)?invidious\.zee\.li',
881 r'(?:www\.)?invidious\.ethibox\.fr',
882 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
883 # youtube-dl invidious instances list
884 r'(?:(?:www|no)\.)?invidiou\.sh',
885 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
886 r'(?:www\.)?invidious\.kabi\.tk',
887 r'(?:www\.)?invidious\.mastodon\.host',
888 r'(?:www\.)?invidious\.zapashcanon\.fr',
889 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
890 r'(?:www\.)?invidious\.tinfoil-hat\.net',
891 r'(?:www\.)?invidious\.himiko\.cloud',
892 r'(?:www\.)?invidious\.reallyancient\.tech',
893 r'(?:www\.)?invidious\.tube',
894 r'(?:www\.)?invidiou\.site',
895 r'(?:www\.)?invidious\.site',
896 r'(?:www\.)?invidious\.xyz',
897 r'(?:www\.)?invidious\.nixnet\.xyz',
898 r'(?:www\.)?invidious\.048596\.xyz',
899 r'(?:www\.)?invidious\.drycat\.fr',
900 r'(?:www\.)?inv\.skyn3t\.in',
901 r'(?:www\.)?tube\.poal\.co',
902 r'(?:www\.)?tube\.connect\.cafe',
903 r'(?:www\.)?vid\.wxzm\.sx',
904 r'(?:www\.)?vid\.mint\.lgbt',
905 r'(?:www\.)?vid\.puffyan\.us',
906 r'(?:www\.)?yewtu\.be',
907 r'(?:www\.)?yt\.elukerio\.org',
908 r'(?:www\.)?yt\.lelux\.fi',
909 r'(?:www\.)?invidious\.ggc-project\.de',
910 r'(?:www\.)?yt\.maisputain\.ovh',
911 r'(?:www\.)?ytprivate\.com',
912 r'(?:www\.)?invidious\.13ad\.de',
913 r'(?:www\.)?invidious\.toot\.koeln',
914 r'(?:www\.)?invidious\.fdn\.fr',
915 r'(?:www\.)?watch\.nettohikari\.com',
916 r'(?:www\.)?invidious\.namazso\.eu',
917 r'(?:www\.)?invidious\.silkky\.cloud',
918 r'(?:www\.)?invidious\.exonip\.de',
919 r'(?:www\.)?invidious\.riverside\.rocks',
920 r'(?:www\.)?invidious\.blamefran\.net',
921 r'(?:www\.)?invidious\.moomoo\.de',
922 r'(?:www\.)?ytb\.trom\.tf',
923 r'(?:www\.)?yt\.cyberhost\.uk',
924 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
925 r'(?:www\.)?qklhadlycap4cnod\.onion',
926 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
927 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
928 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
929 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
930 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
931 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
932 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
933 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
934 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
935 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
936 )
937 _VALID_URL = r"""(?x)^
938 (
939 (?:https?://|//) # http(s):// or protocol-independent URL
940 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
941 (?:www\.)?deturl\.com/www\.youtube\.com|
942 (?:www\.)?pwnyoutube\.com|
943 (?:www\.)?hooktube\.com|
944 (?:www\.)?yourepeat\.com|
945 tube\.majestyc\.net|
946 %(invidious)s|
947 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
948 (?:.*?\#/)? # handle anchor (#/) redirect urls
949 (?: # the various things that can precede the ID:
950 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
951 |(?: # or the v= param in all its forms
952 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
953 (?:\?|\#!?) # the params delimiter ? or # or #!
954 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
955 v=
956 )
957 ))
958 |(?:
959 youtu\.be| # just youtu.be/xxxx
960 vid\.plus| # or vid.plus/xxxx
961 zwearz\.com/watch| # or zwearz.com/watch/xxxx
962 %(invidious)s
963 )/
964 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
965 )
966 )? # all until now is optional -> you can pass the naked ID
967 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
968 (?(1).+)? # if we found the ID, everything can follow
969 (?:\#|$)""" % {
970 'invidious': '|'.join(_INVIDIOUS_SITES),
971 }
972 _PLAYER_INFO_RE = (
973 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
974 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
975 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
976 )
977 _formats = {
978 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
979 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
980 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
981 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
982 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
983 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
984 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
985 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
986 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
987 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
988 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
989 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
990 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
991 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
992 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
993 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
994 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
995 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
996
997
998 # 3D videos
999 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1000 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1001 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1002 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1003 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1004 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1005 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1006
1007 # Apple HTTP Live Streaming
1008 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1009 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1010 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1011 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1012 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1013 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1014 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1015 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1016
1017 # DASH mp4 video
1018 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1019 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1020 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1021 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1022 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1023 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1024 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1025 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1026 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1027 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1028 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1029 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1030
1031 # Dash mp4 audio
1032 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1033 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1034 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1035 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1036 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1037 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1038 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1039
1040 # Dash webm
1041 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1042 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1043 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1044 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1045 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1046 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1047 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1048 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1049 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1050 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1051 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1052 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1053 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1054 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1055 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1056 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1057 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1058 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1059 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1060 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1061 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1062 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1063
1064 # Dash webm audio
1065 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1066 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1067
1068 # Dash webm audio with opus inside
1069 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1070 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1071 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1072
1073 # RTMP (unnamed)
1074 '_rtmp': {'protocol': 'rtmp'},
1075
1076 # av01 video only formats sometimes served with "unknown" codecs
1077 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1078 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1079 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1080 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1081 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1082 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1083 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1084 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1085 }
1086 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1087
1088 _GEO_BYPASS = False
1089
1090 IE_NAME = 'youtube'
1091 _TESTS = [
1092 {
1093 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1094 'info_dict': {
1095 'id': 'BaW_jenozKc',
1096 'ext': 'mp4',
1097 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1098 'uploader': 'Philipp Hagemeister',
1099 'uploader_id': 'phihag',
1100 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1101 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1102 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1103 'upload_date': '20121002',
1104 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1105 'categories': ['Science & Technology'],
1106 'tags': ['youtube-dl'],
1107 'duration': 10,
1108 'view_count': int,
1109 'like_count': int,
1110 'dislike_count': int,
1111 'start_time': 1,
1112 'end_time': 9,
1113 }
1114 },
1115 {
1116 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1117 'note': 'Embed-only video (#1746)',
1118 'info_dict': {
1119 'id': 'yZIXLfi8CZQ',
1120 'ext': 'mp4',
1121 'upload_date': '20120608',
1122 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1123 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1124 'uploader': 'SET India',
1125 'uploader_id': 'setindia',
1126 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1127 'age_limit': 18,
1128 },
1129 'skip': 'Private video',
1130 },
1131 {
1132 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1133 'note': 'Use the first video ID in the URL',
1134 'info_dict': {
1135 'id': 'BaW_jenozKc',
1136 'ext': 'mp4',
1137 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1138 'uploader': 'Philipp Hagemeister',
1139 'uploader_id': 'phihag',
1140 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1141 'upload_date': '20121002',
1142 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1143 'categories': ['Science & Technology'],
1144 'tags': ['youtube-dl'],
1145 'duration': 10,
1146 'view_count': int,
1147 'like_count': int,
1148 'dislike_count': int,
1149 },
1150 'params': {
1151 'skip_download': True,
1152 },
1153 },
1154 {
1155 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1156 'note': '256k DASH audio (format 141) via DASH manifest',
1157 'info_dict': {
1158 'id': 'a9LDPn-MO4I',
1159 'ext': 'm4a',
1160 'upload_date': '20121002',
1161 'uploader_id': '8KVIDEO',
1162 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1163 'description': '',
1164 'uploader': '8KVIDEO',
1165 'title': 'UHDTV TEST 8K VIDEO.mp4'
1166 },
1167 'params': {
1168 'youtube_include_dash_manifest': True,
1169 'format': '141',
1170 },
1171 'skip': 'format 141 not served anymore',
1172 },
1173 # DASH manifest with encrypted signature
1174 {
1175 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1176 'info_dict': {
1177 'id': 'IB3lcPjvWLA',
1178 'ext': 'm4a',
1179 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1180 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1181 'duration': 244,
1182 'uploader': 'AfrojackVEVO',
1183 'uploader_id': 'AfrojackVEVO',
1184 'upload_date': '20131011',
1185 'abr': 129.495,
1186 },
1187 'params': {
1188 'youtube_include_dash_manifest': True,
1189 'format': '141/bestaudio[ext=m4a]',
1190 },
1191 },
1192 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1193 {
1194 'note': 'Embed allowed age-gate video',
1195 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1196 'info_dict': {
1197 'id': 'HtVdAasjOgU',
1198 'ext': 'mp4',
1199 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1200 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1201 'duration': 142,
1202 'uploader': 'The Witcher',
1203 'uploader_id': 'WitcherGame',
1204 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1205 'upload_date': '20140605',
1206 'age_limit': 18,
1207 },
1208 },
1209 {
1210 'note': 'Age-gate video with embed allowed in public site',
1211 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1212 'info_dict': {
1213 'id': 'HsUATh_Nc2U',
1214 'ext': 'mp4',
1215 'title': 'Godzilla 2 (Official Video)',
1216 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1217 'upload_date': '20200408',
1218 'uploader_id': 'FlyingKitty900',
1219 'uploader': 'FlyingKitty',
1220 'age_limit': 18,
1221 },
1222 },
1223 {
1224 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1225 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1226 'info_dict': {
1227 'id': 'Tq92D6wQ1mg',
1228 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1229 'ext': 'mp4',
1230 'upload_date': '20191227',
1231 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1232 'uploader': 'Projekt Melody',
1233 'description': 'md5:17eccca93a786d51bc67646756894066',
1234 'age_limit': 18,
1235 },
1236 },
1237 {
1238 'note': 'Non-Agegated non-embeddable video',
1239 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1240 'info_dict': {
1241 'id': 'MeJVWBSsPAY',
1242 'ext': 'mp4',
1243 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1244 'uploader': 'Herr Lurik',
1245 'uploader_id': 'st3in234',
1246 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1247 'upload_date': '20130730',
1248 },
1249 },
1250 {
1251 'note': 'Non-bypassable age-gated video',
1252 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1253 'only_matching': True,
1254 },
1255 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1256 # YouTube Red ad is not captured for creator
1257 {
1258 'url': '__2ABJjxzNo',
1259 'info_dict': {
1260 'id': '__2ABJjxzNo',
1261 'ext': 'mp4',
1262 'duration': 266,
1263 'upload_date': '20100430',
1264 'uploader_id': 'deadmau5',
1265 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1266 'creator': 'deadmau5',
1267 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1268 'uploader': 'deadmau5',
1269 'title': 'Deadmau5 - Some Chords (HD)',
1270 'alt_title': 'Some Chords',
1271 },
1272 'expected_warnings': [
1273 'DASH manifest missing',
1274 ]
1275 },
1276 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1277 {
1278 'url': 'lqQg6PlCWgI',
1279 'info_dict': {
1280 'id': 'lqQg6PlCWgI',
1281 'ext': 'mp4',
1282 'duration': 6085,
1283 'upload_date': '20150827',
1284 'uploader_id': 'olympic',
1285 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1286 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1287 'uploader': 'Olympics',
1288 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1289 },
1290 'params': {
1291 'skip_download': 'requires avconv',
1292 }
1293 },
1294 # Non-square pixels
1295 {
1296 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1297 'info_dict': {
1298 'id': '_b-2C3KPAM0',
1299 'ext': 'mp4',
1300 'stretched_ratio': 16 / 9.,
1301 'duration': 85,
1302 'upload_date': '20110310',
1303 'uploader_id': 'AllenMeow',
1304 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1305 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1306 'uploader': '孫ᄋᄅ',
1307 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1308 },
1309 },
1310 # url_encoded_fmt_stream_map is empty string
1311 {
1312 'url': 'qEJwOuvDf7I',
1313 'info_dict': {
1314 'id': 'qEJwOuvDf7I',
1315 'ext': 'webm',
1316 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1317 'description': '',
1318 'upload_date': '20150404',
1319 'uploader_id': 'spbelect',
1320 'uploader': 'Наблюдатели Петербурга',
1321 },
1322 'params': {
1323 'skip_download': 'requires avconv',
1324 },
1325 'skip': 'This live event has ended.',
1326 },
1327 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1328 {
1329 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1330 'info_dict': {
1331 'id': 'FIl7x6_3R5Y',
1332 'ext': 'webm',
1333 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1334 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1335 'duration': 220,
1336 'upload_date': '20150625',
1337 'uploader_id': 'dorappi2000',
1338 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1339 'uploader': 'dorappi2000',
1340 'formats': 'mincount:31',
1341 },
1342 'skip': 'not actual anymore',
1343 },
1344 # DASH manifest with segment_list
1345 {
1346 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1347 'md5': '8ce563a1d667b599d21064e982ab9e31',
1348 'info_dict': {
1349 'id': 'CsmdDsKjzN8',
1350 'ext': 'mp4',
1351 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1352 'uploader': 'Airtek',
1353 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1354 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1355 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1356 },
1357 'params': {
1358 'youtube_include_dash_manifest': True,
1359 'format': '135', # bestvideo
1360 },
1361 'skip': 'This live event has ended.',
1362 },
1363 {
1364 # Multifeed videos (multiple cameras), URL is for Main Camera
1365 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1366 'info_dict': {
1367 'id': 'jvGDaLqkpTg',
1368 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1369 'description': 'md5:e03b909557865076822aa169218d6a5d',
1370 },
1371 'playlist': [{
1372 'info_dict': {
1373 'id': 'jvGDaLqkpTg',
1374 'ext': 'mp4',
1375 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1376 'description': 'md5:e03b909557865076822aa169218d6a5d',
1377 'duration': 10643,
1378 'upload_date': '20161111',
1379 'uploader': 'Team PGP',
1380 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1381 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1382 },
1383 }, {
1384 'info_dict': {
1385 'id': '3AKt1R1aDnw',
1386 'ext': 'mp4',
1387 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1388 'description': 'md5:e03b909557865076822aa169218d6a5d',
1389 'duration': 10991,
1390 'upload_date': '20161111',
1391 'uploader': 'Team PGP',
1392 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1393 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1394 },
1395 }, {
1396 'info_dict': {
1397 'id': 'RtAMM00gpVc',
1398 'ext': 'mp4',
1399 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1400 'description': 'md5:e03b909557865076822aa169218d6a5d',
1401 'duration': 10995,
1402 'upload_date': '20161111',
1403 'uploader': 'Team PGP',
1404 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1405 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1406 },
1407 }, {
1408 'info_dict': {
1409 'id': '6N2fdlP3C5U',
1410 'ext': 'mp4',
1411 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1412 'description': 'md5:e03b909557865076822aa169218d6a5d',
1413 'duration': 10990,
1414 'upload_date': '20161111',
1415 'uploader': 'Team PGP',
1416 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1417 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1418 },
1419 }],
1420 'params': {
1421 'skip_download': True,
1422 },
1423 'skip': 'Not multifeed anymore',
1424 },
1425 {
1426 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1427 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1428 'info_dict': {
1429 'id': 'gVfLd0zydlo',
1430 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1431 },
1432 'playlist_count': 2,
1433 'skip': 'Not multifeed anymore',
1434 },
1435 {
1436 'url': 'https://vid.plus/FlRa-iH7PGw',
1437 'only_matching': True,
1438 },
1439 {
1440 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1441 'only_matching': True,
1442 },
1443 {
1444 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1445 # Also tests cut-off URL expansion in video description (see
1446 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1447 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1448 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1449 'info_dict': {
1450 'id': 'lsguqyKfVQg',
1451 'ext': 'mp4',
1452 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1453 'alt_title': 'Dark Walk',
1454 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1455 'duration': 133,
1456 'upload_date': '20151119',
1457 'uploader_id': 'IronSoulElf',
1458 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1459 'uploader': 'IronSoulElf',
1460 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1461 'track': 'Dark Walk',
1462 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1463 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1464 },
1465 'params': {
1466 'skip_download': True,
1467 },
1468 },
1469 {
1470 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1471 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1472 'only_matching': True,
1473 },
1474 {
1475 # Video with yt:stretch=17:0
1476 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1477 'info_dict': {
1478 'id': 'Q39EVAstoRM',
1479 'ext': 'mp4',
1480 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1481 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1482 'upload_date': '20151107',
1483 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1484 'uploader': 'CH GAMER DROID',
1485 },
1486 'params': {
1487 'skip_download': True,
1488 },
1489 'skip': 'This video does not exist.',
1490 },
1491 {
1492 # Video with incomplete 'yt:stretch=16:'
1493 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1494 'only_matching': True,
1495 },
1496 {
1497 # Video licensed under Creative Commons
1498 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1499 'info_dict': {
1500 'id': 'M4gD1WSo5mA',
1501 'ext': 'mp4',
1502 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1503 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1504 'duration': 721,
1505 'upload_date': '20150127',
1506 'uploader_id': 'BerkmanCenter',
1507 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1508 'uploader': 'The Berkman Klein Center for Internet & Society',
1509 'license': 'Creative Commons Attribution license (reuse allowed)',
1510 },
1511 'params': {
1512 'skip_download': True,
1513 },
1514 },
1515 {
1516 # Channel-like uploader_url
1517 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1518 'info_dict': {
1519 'id': 'eQcmzGIKrzg',
1520 'ext': 'mp4',
1521 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1522 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1523 'duration': 4060,
1524 'upload_date': '20151119',
1525 'uploader': 'Bernie Sanders',
1526 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1527 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1528 'license': 'Creative Commons Attribution license (reuse allowed)',
1529 },
1530 'params': {
1531 'skip_download': True,
1532 },
1533 },
1534 {
1535 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1536 'only_matching': True,
1537 },
1538 {
1539 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1540 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1541 'only_matching': True,
1542 },
1543 {
1544 # Rental video preview
1545 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1546 'info_dict': {
1547 'id': 'uGpuVWrhIzE',
1548 'ext': 'mp4',
1549 'title': 'Piku - Trailer',
1550 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1551 'upload_date': '20150811',
1552 'uploader': 'FlixMatrix',
1553 'uploader_id': 'FlixMatrixKaravan',
1554 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1555 'license': 'Standard YouTube License',
1556 },
1557 'params': {
1558 'skip_download': True,
1559 },
1560 'skip': 'This video is not available.',
1561 },
1562 {
1563 # YouTube Red video with episode data
1564 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1565 'info_dict': {
1566 'id': 'iqKdEhx-dD4',
1567 'ext': 'mp4',
1568 'title': 'Isolation - Mind Field (Ep 1)',
1569 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1570 'duration': 2085,
1571 'upload_date': '20170118',
1572 'uploader': 'Vsauce',
1573 'uploader_id': 'Vsauce',
1574 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1575 'series': 'Mind Field',
1576 'season_number': 1,
1577 'episode_number': 1,
1578 },
1579 'params': {
1580 'skip_download': True,
1581 },
1582 'expected_warnings': [
1583 'Skipping DASH manifest',
1584 ],
1585 },
1586 {
1587 # The following content has been identified by the YouTube community
1588 # as inappropriate or offensive to some audiences.
1589 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1590 'info_dict': {
1591 'id': '6SJNVb0GnPI',
1592 'ext': 'mp4',
1593 'title': 'Race Differences in Intelligence',
1594 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1595 'duration': 965,
1596 'upload_date': '20140124',
1597 'uploader': 'New Century Foundation',
1598 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1599 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1600 },
1601 'params': {
1602 'skip_download': True,
1603 },
1604 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1605 },
1606 {
1607 # itag 212
1608 'url': '1t24XAntNCY',
1609 'only_matching': True,
1610 },
1611 {
1612 # geo restricted to JP
1613 'url': 'sJL6WA-aGkQ',
1614 'only_matching': True,
1615 },
1616 {
1617 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1618 'only_matching': True,
1619 },
1620 {
1621 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1622 'only_matching': True,
1623 },
1624 {
1625 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1626 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1627 'only_matching': True,
1628 },
1629 {
1630 # DRM protected
1631 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1632 'only_matching': True,
1633 },
1634 {
1635 # Video with unsupported adaptive stream type formats
1636 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1637 'info_dict': {
1638 'id': 'Z4Vy8R84T1U',
1639 'ext': 'mp4',
1640 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1641 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1642 'duration': 433,
1643 'upload_date': '20130923',
1644 'uploader': 'Amelia Putri Harwita',
1645 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1646 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1647 'formats': 'maxcount:10',
1648 },
1649 'params': {
1650 'skip_download': True,
1651 'youtube_include_dash_manifest': False,
1652 },
1653 'skip': 'not actual anymore',
1654 },
1655 {
1656 # Youtube Music Auto-generated description
1657 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1658 'info_dict': {
1659 'id': 'MgNrAu2pzNs',
1660 'ext': 'mp4',
1661 'title': 'Voyeur Girl',
1662 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1663 'upload_date': '20190312',
1664 'uploader': 'Stephen - Topic',
1665 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1666 'artist': 'Stephen',
1667 'track': 'Voyeur Girl',
1668 'album': 'it\'s too much love to know my dear',
1669 'release_date': '20190313',
1670 'release_year': 2019,
1671 },
1672 'params': {
1673 'skip_download': True,
1674 },
1675 },
1676 {
1677 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1678 'only_matching': True,
1679 },
1680 {
1681 # invalid -> valid video id redirection
1682 'url': 'DJztXj2GPfl',
1683 'info_dict': {
1684 'id': 'DJztXj2GPfk',
1685 'ext': 'mp4',
1686 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1687 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1688 'upload_date': '20090125',
1689 'uploader': 'Prochorowka',
1690 'uploader_id': 'Prochorowka',
1691 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1692 'artist': 'Panjabi MC',
1693 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1694 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1695 },
1696 'params': {
1697 'skip_download': True,
1698 },
1699 'skip': 'Video unavailable',
1700 },
1701 {
1702 # empty description results in an empty string
1703 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1704 'info_dict': {
1705 'id': 'x41yOUIvK2k',
1706 'ext': 'mp4',
1707 'title': 'IMG 3456',
1708 'description': '',
1709 'upload_date': '20170613',
1710 'uploader_id': 'ElevageOrVert',
1711 'uploader': 'ElevageOrVert',
1712 },
1713 'params': {
1714 'skip_download': True,
1715 },
1716 },
1717 {
1718 # with '};' inside yt initial data (see [1])
1719 # see [2] for an example with '};' inside ytInitialPlayerResponse
1720 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1721 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1722 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1723 'info_dict': {
1724 'id': 'CHqg6qOn4no',
1725 'ext': 'mp4',
1726 'title': 'Part 77 Sort a list of simple types in c#',
1727 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1728 'upload_date': '20130831',
1729 'uploader_id': 'kudvenkat',
1730 'uploader': 'kudvenkat',
1731 },
1732 'params': {
1733 'skip_download': True,
1734 },
1735 },
1736 {
1737 # another example of '};' in ytInitialData
1738 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1739 'only_matching': True,
1740 },
1741 {
1742 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1743 'only_matching': True,
1744 },
1745 {
1746 # https://github.com/ytdl-org/youtube-dl/pull/28094
1747 'url': 'OtqTfy26tG0',
1748 'info_dict': {
1749 'id': 'OtqTfy26tG0',
1750 'ext': 'mp4',
1751 'title': 'Burn Out',
1752 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1753 'upload_date': '20141120',
1754 'uploader': 'The Cinematic Orchestra - Topic',
1755 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1756 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1757 'artist': 'The Cinematic Orchestra',
1758 'track': 'Burn Out',
1759 'album': 'Every Day',
1760 'release_data': None,
1761 'release_year': None,
1762 },
1763 'params': {
1764 'skip_download': True,
1765 },
1766 },
1767 {
1768 # controversial video, only works with bpctr when authenticated with cookies
1769 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1770 'only_matching': True,
1771 },
1772 {
1773 # controversial video, requires bpctr/contentCheckOk
1774 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1775 'info_dict': {
1776 'id': 'SZJvDhaSDnc',
1777 'ext': 'mp4',
1778 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1779 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1780 'uploader': 'CBS This Morning',
1781 'uploader_id': 'CBSThisMorning',
1782 'upload_date': '20140716',
1783 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1784 }
1785 },
1786 {
1787 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1788 'url': 'cBvYw8_A0vQ',
1789 'info_dict': {
1790 'id': 'cBvYw8_A0vQ',
1791 'ext': 'mp4',
1792 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1793 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1794 'upload_date': '20201120',
1795 'uploader': 'Walk around Japan',
1796 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1797 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1798 },
1799 'params': {
1800 'skip_download': True,
1801 },
1802 }, {
1803 # Has multiple audio streams
1804 'url': 'WaOKSUlf4TM',
1805 'only_matching': True
1806 }, {
1807 # Requires Premium: has format 141 when requested using YTM url
1808 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1809 'only_matching': True
1810 }, {
1811 # multiple subtitles with same lang_code
1812 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1813 'only_matching': True,
1814 }, {
1815 # Force use android client fallback
1816 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1817 'info_dict': {
1818 'id': 'YOelRv7fMxY',
1819 'title': 'DIGGING A SECRET TUNNEL Part 1',
1820 'ext': '3gp',
1821 'upload_date': '20210624',
1822 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1823 'uploader': 'colinfurze',
1824 'uploader_id': 'colinfurze',
1825 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1826 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1827 },
1828 'params': {
1829 'format': '17', # 3gp format available on android
1830 'extractor_args': {'youtube': {'player_client': ['android']}},
1831 },
1832 },
1833 {
1834 # Skip download of additional client configs (remix client config in this case)
1835 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1836 'only_matching': True,
1837 'params': {
1838 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1839 },
1840 }, {
1841 # shorts
1842 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1843 'only_matching': True,
1844 },
1845 ]
1846
1847 @classmethod
1848 def suitable(cls, url):
1849 from ..utils import parse_qs
1850
1851 qs = parse_qs(url)
1852 if qs.get('list', [None])[0]:
1853 return False
1854 return super(YoutubeIE, cls).suitable(url)
1855
1856 def __init__(self, *args, **kwargs):
1857 super(YoutubeIE, self).__init__(*args, **kwargs)
1858 self._code_cache = {}
1859 self._player_cache = {}
1860
1861 def _extract_player_url(self, ytcfg=None, webpage=None):
1862 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1863 if not player_url and webpage:
1864 player_url = self._search_regex(
1865 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1866 webpage, 'player URL', fatal=False)
1867 if not player_url:
1868 return None
1869 if player_url.startswith('//'):
1870 player_url = 'https:' + player_url
1871 elif not re.match(r'https?://', player_url):
1872 player_url = compat_urlparse.urljoin(
1873 'https://www.youtube.com', player_url)
1874 return player_url
1875
1876 def _signature_cache_id(self, example_sig):
1877 """ Return a string representation of a signature """
1878 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1879
1880 @classmethod
1881 def _extract_player_info(cls, player_url):
1882 for player_re in cls._PLAYER_INFO_RE:
1883 id_m = re.search(player_re, player_url)
1884 if id_m:
1885 break
1886 else:
1887 raise ExtractorError('Cannot identify player %r' % player_url)
1888 return id_m.group('id')
1889
1890 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1891 player_id = self._extract_player_info(player_url)
1892 if player_id not in self._code_cache:
1893 self._code_cache[player_id] = self._download_webpage(
1894 player_url, video_id, fatal=fatal,
1895 note='Downloading player ' + player_id,
1896 errnote='Download of %s failed' % player_url)
1897 return player_id in self._code_cache
1898
1899 def _extract_signature_function(self, video_id, player_url, example_sig):
1900 player_id = self._extract_player_info(player_url)
1901
1902 # Read from filesystem cache
1903 func_id = 'js_%s_%s' % (
1904 player_id, self._signature_cache_id(example_sig))
1905 assert os.path.basename(func_id) == func_id
1906
1907 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1908 if cache_spec is not None:
1909 return lambda s: ''.join(s[i] for i in cache_spec)
1910
1911 if self._load_player(video_id, player_url):
1912 code = self._code_cache[player_id]
1913 res = self._parse_sig_js(code)
1914
1915 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1916 cache_res = res(test_string)
1917 cache_spec = [ord(c) for c in cache_res]
1918
1919 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1920 return res
1921
1922 def _print_sig_code(self, func, example_sig):
1923 def gen_sig_code(idxs):
1924 def _genslice(start, end, step):
1925 starts = '' if start == 0 else str(start)
1926 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1927 steps = '' if step == 1 else (':%d' % step)
1928 return 's[%s%s%s]' % (starts, ends, steps)
1929
1930 step = None
1931 # Quelch pyflakes warnings - start will be set when step is set
1932 start = '(Never used)'
1933 for i, prev in zip(idxs[1:], idxs[:-1]):
1934 if step is not None:
1935 if i - prev == step:
1936 continue
1937 yield _genslice(start, prev, step)
1938 step = None
1939 continue
1940 if i - prev in [-1, 1]:
1941 step = i - prev
1942 start = prev
1943 continue
1944 else:
1945 yield 's[%d]' % prev
1946 if step is None:
1947 yield 's[%d]' % i
1948 else:
1949 yield _genslice(start, i, step)
1950
1951 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1952 cache_res = func(test_string)
1953 cache_spec = [ord(c) for c in cache_res]
1954 expr_code = ' + '.join(gen_sig_code(cache_spec))
1955 signature_id_tuple = '(%s)' % (
1956 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1957 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1958 ' return %s\n') % (signature_id_tuple, expr_code)
1959 self.to_screen('Extracted signature function:\n' + code)
1960
1961 def _parse_sig_js(self, jscode):
1962 funcname = self._search_regex(
1963 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1964 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1965 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1966 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1967 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1968 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1969 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1970 # Obsolete patterns
1971 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1972 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1973 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1974 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1975 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1976 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1977 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1978 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1979 jscode, 'Initial JS player signature function name', group='sig')
1980
1981 jsi = JSInterpreter(jscode)
1982 initial_function = jsi.extract_function(funcname)
1983 return lambda s: initial_function([s])
1984
1985 def _decrypt_signature(self, s, video_id, player_url):
1986 """Turn the encrypted s field into a working signature"""
1987
1988 if player_url is None:
1989 raise ExtractorError('Cannot decrypt signature without player_url')
1990
1991 try:
1992 player_id = (player_url, self._signature_cache_id(s))
1993 if player_id not in self._player_cache:
1994 func = self._extract_signature_function(
1995 video_id, player_url, s
1996 )
1997 self._player_cache[player_id] = func
1998 func = self._player_cache[player_id]
1999 if self.get_param('youtube_print_sig_code'):
2000 self._print_sig_code(func, s)
2001 return func(s)
2002 except Exception as e:
2003 tb = traceback.format_exc()
2004 raise ExtractorError(
2005 'Signature extraction failed: ' + tb, cause=e)
2006
2007 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2008 """
2009 Extract signatureTimestamp (sts)
2010 Required to tell API what sig/player version is in use.
2011 """
2012 sts = None
2013 if isinstance(ytcfg, dict):
2014 sts = int_or_none(ytcfg.get('STS'))
2015
2016 if not sts:
2017 # Attempt to extract from player
2018 if player_url is None:
2019 error_msg = 'Cannot extract signature timestamp without player_url.'
2020 if fatal:
2021 raise ExtractorError(error_msg)
2022 self.report_warning(error_msg)
2023 return
2024 if self._load_player(video_id, player_url, fatal=fatal):
2025 player_id = self._extract_player_info(player_url)
2026 code = self._code_cache[player_id]
2027 sts = int_or_none(self._search_regex(
2028 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2029 'JS player signature timestamp', group='sts', fatal=fatal))
2030 return sts
2031
2032 def _mark_watched(self, video_id, player_responses):
2033 playback_url = traverse_obj(
2034 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2035 expected_type=url_or_none, get_all=False)
2036 if not playback_url:
2037 self.report_warning('Unable to mark watched')
2038 return
2039 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2040 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2041
2042 # cpn generation algorithm is reverse engineered from base.js.
2043 # In fact it works even with dummy cpn.
2044 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2045 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2046
2047 qs.update({
2048 'ver': ['2'],
2049 'cpn': [cpn],
2050 })
2051 playback_url = compat_urlparse.urlunparse(
2052 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2053
2054 self._download_webpage(
2055 playback_url, video_id, 'Marking watched',
2056 'Unable to mark watched', fatal=False)
2057
2058 @staticmethod
2059 def _extract_urls(webpage):
2060 # Embedded YouTube player
2061 entries = [
2062 unescapeHTML(mobj.group('url'))
2063 for mobj in re.finditer(r'''(?x)
2064 (?:
2065 <iframe[^>]+?src=|
2066 data-video-url=|
2067 <embed[^>]+?src=|
2068 embedSWF\(?:\s*|
2069 <object[^>]+data=|
2070 new\s+SWFObject\(
2071 )
2072 (["\'])
2073 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2074 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2075 \1''', webpage)]
2076
2077 # lazyYT YouTube embed
2078 entries.extend(list(map(
2079 unescapeHTML,
2080 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2081
2082 # Wordpress "YouTube Video Importer" plugin
2083 matches = re.findall(r'''(?x)<div[^>]+
2084 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2085 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2086 entries.extend(m[-1] for m in matches)
2087
2088 return entries
2089
2090 @staticmethod
2091 def _extract_url(webpage):
2092 urls = YoutubeIE._extract_urls(webpage)
2093 return urls[0] if urls else None
2094
2095 @classmethod
2096 def extract_id(cls, url):
2097 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2098 if mobj is None:
2099 raise ExtractorError('Invalid URL: %s' % url)
2100 return mobj.group('id')
2101
2102 def _extract_chapters_from_json(self, data, duration):
2103 chapter_list = traverse_obj(
2104 data, (
2105 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2106 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2107 ), expected_type=list)
2108
2109 return self._extract_chapters(
2110 chapter_list,
2111 chapter_time=lambda chapter: float_or_none(
2112 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2113 chapter_title=lambda chapter: traverse_obj(
2114 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2115 duration=duration)
2116
2117 def _extract_chapters_from_engagement_panel(self, data, duration):
2118 content_list = traverse_obj(
2119 data,
2120 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2121 expected_type=list, default=[])
2122 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2123 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2124
2125 return next((
2126 filter(None, (
2127 self._extract_chapters(
2128 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2129 chapter_time, chapter_title, duration)
2130 for contents in content_list
2131 ))), [])
2132
2133 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2134 chapters = []
2135 last_chapter = {'start_time': 0}
2136 for idx, chapter in enumerate(chapter_list or []):
2137 title = chapter_title(chapter)
2138 start_time = chapter_time(chapter)
2139 if start_time is None:
2140 continue
2141 last_chapter['end_time'] = start_time
2142 if start_time < last_chapter['start_time']:
2143 if idx == 1:
2144 chapters.pop()
2145 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2146 else:
2147 self.report_warning(f'Invalid start time for chapter "{title}"')
2148 continue
2149 last_chapter = {'start_time': start_time, 'title': title}
2150 chapters.append(last_chapter)
2151 last_chapter['end_time'] = duration
2152 return chapters
2153
2154 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2155 return self._parse_json(self._search_regex(
2156 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2157 regex), webpage, name, default='{}'), video_id, fatal=False)
2158
2159 @staticmethod
2160 def parse_time_text(time_text):
2161 """
2162 Parse the comment time text
2163 time_text is in the format 'X units ago (edited)'
2164 """
2165 time_text_split = time_text.split(' ')
2166 if len(time_text_split) >= 3:
2167 try:
2168 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2169 except ValueError:
2170 return None
2171
2172 def _extract_comment(self, comment_renderer, parent=None):
2173 comment_id = comment_renderer.get('commentId')
2174 if not comment_id:
2175 return
2176
2177 text = self._get_text(comment_renderer, 'contentText')
2178
2179 # note: timestamp is an estimate calculated from the current time and time_text
2180 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2181 time_text_dt = self.parse_time_text(time_text)
2182 if isinstance(time_text_dt, datetime.datetime):
2183 timestamp = calendar.timegm(time_text_dt.timetuple())
2184 author = self._get_text(comment_renderer, 'authorText')
2185 author_id = try_get(comment_renderer,
2186 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2187
2188 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2189 lambda x: x['likeCount']), compat_str)) or 0
2190 author_thumbnail = try_get(comment_renderer,
2191 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2192
2193 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2194 is_favorited = 'creatorHeart' in (try_get(
2195 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2196 return {
2197 'id': comment_id,
2198 'text': text,
2199 'timestamp': timestamp,
2200 'time_text': time_text,
2201 'like_count': votes,
2202 'is_favorited': is_favorited,
2203 'author': author,
2204 'author_id': author_id,
2205 'author_thumbnail': author_thumbnail,
2206 'author_is_uploader': author_is_uploader,
2207 'parent': parent or 'root'
2208 }
2209
2210 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2211 ytcfg, video_id, parent=None, comment_counts=None):
2212
2213 def extract_header(contents):
2214 _total_comments = 0
2215 _continuation = None
2216 for content in contents:
2217 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2218 expected_comment_count = parse_count(self._get_text(
2219 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2220
2221 if expected_comment_count:
2222 comment_counts[1] = expected_comment_count
2223 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2224 _total_comments = comment_counts[1]
2225 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2226 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2227
2228 sort_menu_item = try_get(
2229 comments_header_renderer,
2230 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2231 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2232
2233 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2234 if not _continuation:
2235 continue
2236
2237 sort_text = sort_menu_item.get('title')
2238 if isinstance(sort_text, compat_str):
2239 sort_text = sort_text.lower()
2240 else:
2241 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2242 self.to_screen('Sorting comments by %s' % sort_text)
2243 break
2244 return _total_comments, _continuation
2245
2246 def extract_thread(contents):
2247 if not parent:
2248 comment_counts[2] = 0
2249 for content in contents:
2250 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2251 comment_renderer = try_get(
2252 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2253 content, (lambda x: x['commentRenderer'], dict))
2254
2255 if not comment_renderer:
2256 continue
2257 comment = self._extract_comment(comment_renderer, parent)
2258 if not comment:
2259 continue
2260 comment_counts[0] += 1
2261 yield comment
2262 # Attempt to get the replies
2263 comment_replies_renderer = try_get(
2264 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2265
2266 if comment_replies_renderer:
2267 comment_counts[2] += 1
2268 comment_entries_iter = self._comment_entries(
2269 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2270 video_id, parent=comment.get('id'), comment_counts=comment_counts)
2271
2272 for reply_comment in comment_entries_iter:
2273 yield reply_comment
2274
2275 # YouTube comments have a max depth of 2
2276 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2277 if max_depth == 1 and parent:
2278 return
2279 if not comment_counts:
2280 # comment so far, est. total comments, current comment thread #
2281 comment_counts = [0, 0, 0]
2282
2283 continuation = self._extract_continuation(root_continuation_data)
2284 if continuation and len(continuation['continuation']) < 27:
2285 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2286 continuation_token = self._generate_comment_continuation(video_id)
2287 continuation = self._build_api_continuation_query(continuation_token, None)
2288
2289 visitor_data = None
2290 is_first_continuation = parent is None
2291
2292 for page_num in itertools.count(0):
2293 if not continuation:
2294 break
2295 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2296 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2297 if page_num == 0:
2298 if is_first_continuation:
2299 note_prefix = 'Downloading comment section API JSON'
2300 else:
2301 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2302 comment_counts[2], comment_prog_str)
2303 else:
2304 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2305 ' ' if parent else '', ' replies' if parent else '',
2306 page_num, comment_prog_str)
2307
2308 response = self._extract_response(
2309 item_id=None, query=continuation,
2310 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2311 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2312 if not response:
2313 break
2314 visitor_data = try_get(
2315 response,
2316 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2317 compat_str) or visitor_data
2318
2319 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2320
2321 continuation = None
2322 if isinstance(continuation_contents, list):
2323 for continuation_section in continuation_contents:
2324 if not isinstance(continuation_section, dict):
2325 continue
2326 continuation_items = try_get(
2327 continuation_section,
2328 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2329 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2330 list) or []
2331 if is_first_continuation:
2332 total_comments, continuation = extract_header(continuation_items)
2333 if total_comments:
2334 yield total_comments
2335 is_first_continuation = False
2336 if continuation:
2337 break
2338 continue
2339 count = 0
2340 for count, entry in enumerate(extract_thread(continuation_items)):
2341 yield entry
2342 continuation = self._extract_continuation({'contents': continuation_items})
2343 if continuation:
2344 # Sometimes YouTube provides a continuation without any comments
2345 # In most cases we end up just downloading these with very little comments to come.
2346 if count == 0:
2347 if not parent:
2348 self.report_warning('No comments received - assuming end of comments')
2349 continuation = None
2350 break
2351
2352 # Deprecated response structure
2353 elif isinstance(continuation_contents, dict):
2354 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2355 for key, continuation_renderer in continuation_contents.items():
2356 if key not in known_continuation_renderers:
2357 continue
2358 if not isinstance(continuation_renderer, dict):
2359 continue
2360 if is_first_continuation:
2361 header_continuation_items = [continuation_renderer.get('header') or {}]
2362 total_comments, continuation = extract_header(header_continuation_items)
2363 if total_comments:
2364 yield total_comments
2365 is_first_continuation = False
2366 if continuation:
2367 break
2368
2369 # Sometimes YouTube provides a continuation without any comments
2370 # In most cases we end up just downloading these with very little comments to come.
2371 count = 0
2372 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2373 yield entry
2374 continuation = self._extract_continuation(continuation_renderer)
2375 if count == 0:
2376 if not parent:
2377 self.report_warning('No comments received - assuming end of comments')
2378 continuation = None
2379 break
2380
2381 @staticmethod
2382 def _generate_comment_continuation(video_id):
2383 """
2384 Generates initial comment section continuation token from given video id
2385 """
2386 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2387 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2388 new_continuation_intlist = list(itertools.chain.from_iterable(
2389 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2390 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2391
2392 def _extract_comments(self, ytcfg, video_id, contents, webpage):
2393 """Entry for comment extraction"""
2394 def _real_comment_extract(contents):
2395 if isinstance(contents, list):
2396 for entry in contents:
2397 for key, renderer in entry.items():
2398 if key not in known_entry_comment_renderers:
2399 continue
2400 yield from self._comment_entries(
2401 renderer, video_id=video_id, ytcfg=ytcfg,
2402 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2403 account_syncid=self._extract_account_syncid(ytcfg))
2404 break
2405 comments = []
2406 known_entry_comment_renderers = ('itemSectionRenderer',)
2407 estimated_total = 0
2408 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2409 # Force English regardless of account setting to prevent parsing issues
2410 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2411 ytcfg = copy.deepcopy(ytcfg)
2412 traverse_obj(
2413 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2414 try:
2415 for comment in _real_comment_extract(contents):
2416 if len(comments) >= max_comments:
2417 break
2418 if isinstance(comment, int):
2419 estimated_total = comment
2420 continue
2421 comments.append(comment)
2422 except KeyboardInterrupt:
2423 self.to_screen('Interrupted by user')
2424 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2425 return {
2426 'comments': comments,
2427 'comment_count': len(comments),
2428 }
2429
2430 @staticmethod
2431 def _generate_player_context(sts=None):
2432 context = {
2433 'html5Preference': 'HTML5_PREF_WANTS',
2434 }
2435 if sts is not None:
2436 context['signatureTimestamp'] = sts
2437 return {
2438 'playbackContext': {
2439 'contentPlaybackContext': context
2440 },
2441 'contentCheckOk': True,
2442 'racyCheckOk': True
2443 }
2444
2445 @staticmethod
2446 def _is_agegated(player_response):
2447 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2448 return True
2449
2450 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2451 AGE_GATE_REASONS = (
2452 'confirm your age', 'age-restricted', 'inappropriate', # reason
2453 'age_verification_required', 'age_check_required', # status
2454 )
2455 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2456
2457 @staticmethod
2458 def _is_unplayable(player_response):
2459 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2460
2461 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2462
2463 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2464 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2465 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2466 headers = self.generate_api_headers(
2467 player_ytcfg, identity_token, syncid,
2468 default_client=client, session_index=session_index)
2469
2470 yt_query = {'videoId': video_id}
2471 yt_query.update(self._generate_player_context(sts))
2472 return self._extract_response(
2473 item_id=video_id, ep='player', query=yt_query,
2474 ytcfg=player_ytcfg, headers=headers, fatal=True,
2475 default_client=client,
2476 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2477 ) or None
2478
2479 def _get_requested_clients(self, url, smuggled_data):
2480 requested_clients = []
2481 allowed_clients = sorted(
2482 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2483 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2484 for client in self._configuration_arg('player_client'):
2485 if client in allowed_clients:
2486 requested_clients.append(client)
2487 elif client == 'all':
2488 requested_clients.extend(allowed_clients)
2489 else:
2490 self.report_warning(f'Skipping unsupported client {client}')
2491 if not requested_clients:
2492 requested_clients = ['android', 'web']
2493
2494 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2495 requested_clients.extend(
2496 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2497
2498 return orderedSet(requested_clients)
2499
2500 def _extract_player_ytcfg(self, client, video_id):
2501 url = {
2502 'web_music': 'https://music.youtube.com',
2503 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2504 }.get(client)
2505 if not url:
2506 return {}
2507 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2508 return self.extract_ytcfg(video_id, webpage) or {}
2509
2510 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2511 initial_pr = None
2512 if webpage:
2513 initial_pr = self._extract_yt_initial_variable(
2514 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2515 video_id, 'initial player response')
2516
2517 original_clients = clients
2518 clients = clients[::-1]
2519
2520 def append_client(client_name):
2521 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2522 clients.append(client_name)
2523
2524 # Android player_response does not have microFormats which are needed for
2525 # extraction of some data. So we return the initial_pr with formats
2526 # stripped out even if not requested by the user
2527 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2528 yielded_pr = False
2529 if initial_pr:
2530 pr = dict(initial_pr)
2531 pr['streamingData'] = None
2532 yielded_pr = True
2533 yield pr
2534
2535 last_error = None
2536 while clients:
2537 client = clients.pop()
2538 player_ytcfg = master_ytcfg if client == 'web' else {}
2539 if 'configs' not in self._configuration_arg('player_skip'):
2540 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2541
2542 try:
2543 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2544 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2545 except ExtractorError as e:
2546 if last_error:
2547 self.report_warning(last_error)
2548 last_error = e
2549 continue
2550
2551 if pr:
2552 yielded_pr = True
2553 yield pr
2554
2555 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2556 if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header():
2557 append_client(client.replace('_agegate', '_creator'))
2558 elif self._is_agegated(pr):
2559 append_client(f'{client}_agegate')
2560
2561 if last_error:
2562 if not yielded_pr:
2563 raise last_error
2564 self.report_warning(last_error)
2565
2566 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2567 itags, stream_ids = [], []
2568 itag_qualities, res_qualities = {}, {}
2569 q = qualities([
2570 # Normally tiny is the smallest video-only formats. But
2571 # audio-only formats with unknown quality may get tagged as tiny
2572 'tiny',
2573 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2574 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2575 ])
2576 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2577
2578 for fmt in streaming_formats:
2579 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2580 continue
2581
2582 itag = str_or_none(fmt.get('itag'))
2583 audio_track = fmt.get('audioTrack') or {}
2584 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2585 if stream_id in stream_ids:
2586 continue
2587
2588 quality = fmt.get('quality')
2589 height = int_or_none(fmt.get('height'))
2590 if quality == 'tiny' or not quality:
2591 quality = fmt.get('audioQuality', '').lower() or quality
2592 # The 3gp format (17) in android client has a quality of "small",
2593 # but is actually worse than other formats
2594 if itag == '17':
2595 quality = 'tiny'
2596 if quality:
2597 if itag:
2598 itag_qualities[itag] = quality
2599 if height:
2600 res_qualities[height] = quality
2601 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2602 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2603 # number of fragment that would subsequently requested with (`&sq=N`)
2604 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2605 continue
2606
2607 fmt_url = fmt.get('url')
2608 if not fmt_url:
2609 sc = compat_parse_qs(fmt.get('signatureCipher'))
2610 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2611 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2612 if not (sc and fmt_url and encrypted_sig):
2613 continue
2614 if not player_url:
2615 continue
2616 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2617 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2618 fmt_url += '&' + sp + '=' + signature
2619
2620 if itag:
2621 itags.append(itag)
2622 stream_ids.append(stream_id)
2623
2624 tbr = float_or_none(
2625 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2626 dct = {
2627 'asr': int_or_none(fmt.get('audioSampleRate')),
2628 'filesize': int_or_none(fmt.get('contentLength')),
2629 'format_id': itag,
2630 'format_note': ', '.join(filter(None, (
2631 '%s%s' % (audio_track.get('displayName') or '',
2632 ' (default)' if audio_track.get('audioIsDefault') else ''),
2633 fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2634 'fps': int_or_none(fmt.get('fps')),
2635 'height': height,
2636 'quality': q(quality),
2637 'tbr': tbr,
2638 'url': fmt_url,
2639 'width': int_or_none(fmt.get('width')),
2640 'language': audio_track.get('id', '').split('.')[0],
2641 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2642 }
2643 mime_mobj = re.match(
2644 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2645 if mime_mobj:
2646 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2647 dct.update(parse_codecs(mime_mobj.group(2)))
2648 no_audio = dct.get('acodec') == 'none'
2649 no_video = dct.get('vcodec') == 'none'
2650 if no_audio:
2651 dct['vbr'] = tbr
2652 if no_video:
2653 dct['abr'] = tbr
2654 if no_audio or no_video:
2655 dct['downloader_options'] = {
2656 # Youtube throttles chunks >~10M
2657 'http_chunk_size': 10485760,
2658 }
2659 if dct.get('ext'):
2660 dct['container'] = dct['ext'] + '_dash'
2661 yield dct
2662
2663 skip_manifests = self._configuration_arg('skip')
2664 get_dash = (
2665 (not is_live or self._configuration_arg('include_live_dash'))
2666 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2667 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2668
2669 def guess_quality(f):
2670 for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2671 if val in qdict:
2672 return q(qdict[val])
2673 return -1
2674
2675 for sd in streaming_data:
2676 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2677 if hls_manifest_url:
2678 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2679 itag = self._search_regex(
2680 r'/itag/(\d+)', f['url'], 'itag', default=None)
2681 if itag in itags:
2682 continue
2683 if itag:
2684 f['format_id'] = itag
2685 itags.append(itag)
2686 f['quality'] = guess_quality(f)
2687 yield f
2688
2689 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2690 if dash_manifest_url:
2691 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2692 itag = f['format_id']
2693 if itag in itags:
2694 continue
2695 if itag:
2696 itags.append(itag)
2697 f['quality'] = guess_quality(f)
2698 filesize = int_or_none(self._search_regex(
2699 r'/clen/(\d+)', f.get('fragment_base_url')
2700 or f['url'], 'file size', default=None))
2701 if filesize:
2702 f['filesize'] = filesize
2703 yield f
2704
2705 def _real_extract(self, url):
2706 url, smuggled_data = unsmuggle_url(url, {})
2707 video_id = self._match_id(url)
2708
2709 base_url = self.http_scheme() + '//www.youtube.com/'
2710 webpage_url = base_url + 'watch?v=' + video_id
2711 webpage = self._download_webpage(
2712 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2713
2714 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2715 player_url = self._extract_player_url(master_ytcfg, webpage)
2716 identity_token = self._extract_identity_token(webpage, video_id)
2717
2718 player_responses = list(self._extract_player_responses(
2719 self._get_requested_clients(url, smuggled_data),
2720 video_id, webpage, master_ytcfg, player_url, identity_token))
2721
2722 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2723
2724 playability_statuses = traverse_obj(
2725 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2726
2727 trailer_video_id = get_first(
2728 playability_statuses,
2729 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2730 expected_type=str)
2731 if trailer_video_id:
2732 return self.url_result(
2733 trailer_video_id, self.ie_key(), trailer_video_id)
2734
2735 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2736 if webpage else (lambda x: None))
2737
2738 video_details = traverse_obj(
2739 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2740 microformats = traverse_obj(
2741 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2742 expected_type=dict, default=[])
2743 video_title = (
2744 get_first(video_details, 'title')
2745 or self._get_text(microformats, (..., 'title'))
2746 or search_meta(['og:title', 'twitter:title', 'title']))
2747 video_description = get_first(video_details, 'shortDescription')
2748
2749 if not smuggled_data.get('force_singlefeed', False):
2750 if not self.get_param('noplaylist'):
2751 multifeed_metadata_list = get_first(
2752 player_responses,
2753 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2754 expected_type=str)
2755 if multifeed_metadata_list:
2756 entries = []
2757 feed_ids = []
2758 for feed in multifeed_metadata_list.split(','):
2759 # Unquote should take place before split on comma (,) since textual
2760 # fields may contain comma as well (see
2761 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2762 feed_data = compat_parse_qs(
2763 compat_urllib_parse_unquote_plus(feed))
2764
2765 def feed_entry(name):
2766 return try_get(
2767 feed_data, lambda x: x[name][0], compat_str)
2768
2769 feed_id = feed_entry('id')
2770 if not feed_id:
2771 continue
2772 feed_title = feed_entry('title')
2773 title = video_title
2774 if feed_title:
2775 title += ' (%s)' % feed_title
2776 entries.append({
2777 '_type': 'url_transparent',
2778 'ie_key': 'Youtube',
2779 'url': smuggle_url(
2780 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2781 {'force_singlefeed': True}),
2782 'title': title,
2783 })
2784 feed_ids.append(feed_id)
2785 self.to_screen(
2786 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2787 % (', '.join(feed_ids), video_id))
2788 return self.playlist_result(
2789 entries, video_id, video_title, video_description)
2790 else:
2791 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2792
2793 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2794 is_live = get_first(video_details, 'isLive')
2795 if is_live is None:
2796 is_live = get_first(live_broadcast_details, 'isLiveNow')
2797
2798 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2799 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2800
2801 if not formats:
2802 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2803 self.report_drm(video_id)
2804 pemr = get_first(
2805 playability_statuses,
2806 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2807 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2808 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2809 if subreason:
2810 if subreason == 'The uploader has not made this video available in your country.':
2811 countries = get_first(microformats, 'availableCountries')
2812 if not countries:
2813 regions_allowed = search_meta('regionsAllowed')
2814 countries = regions_allowed.split(',') if regions_allowed else None
2815 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2816 reason += f'. {subreason}'
2817 if reason:
2818 self.raise_no_formats(reason, expected=True)
2819
2820 for f in formats:
2821 if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
2822 f['source_preference'] = -10
2823 # TODO: this method is not reliable
2824 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
2825
2826 # Source is given priority since formats that throttle are given lower source_preference
2827 # When throttling issue is fully fixed, remove this
2828 self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang'))
2829
2830 keywords = get_first(video_details, 'keywords', expected_type=list) or []
2831 if not keywords and webpage:
2832 keywords = [
2833 unescapeHTML(m.group('content'))
2834 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2835 for keyword in keywords:
2836 if keyword.startswith('yt:stretch='):
2837 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2838 if mobj:
2839 # NB: float is intentional for forcing float division
2840 w, h = (float(v) for v in mobj.groups())
2841 if w > 0 and h > 0:
2842 ratio = w / h
2843 for f in formats:
2844 if f.get('vcodec') != 'none':
2845 f['stretched_ratio'] = ratio
2846 break
2847
2848 thumbnails = []
2849 thumbnail_dicts = traverse_obj(
2850 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2851 expected_type=dict, default=[])
2852 for thumbnail in thumbnail_dicts:
2853 thumbnail_url = thumbnail.get('url')
2854 if not thumbnail_url:
2855 continue
2856 # Sometimes youtube gives a wrong thumbnail URL. See:
2857 # https://github.com/yt-dlp/yt-dlp/issues/233
2858 # https://github.com/ytdl-org/youtube-dl/issues/28023
2859 if 'maxresdefault' in thumbnail_url:
2860 thumbnail_url = thumbnail_url.split('?')[0]
2861 thumbnails.append({
2862 'url': thumbnail_url,
2863 'height': int_or_none(thumbnail.get('height')),
2864 'width': int_or_none(thumbnail.get('width')),
2865 })
2866 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2867 if thumbnail_url:
2868 thumbnails.append({
2869 'url': thumbnail_url,
2870 })
2871 # The best resolution thumbnails sometimes does not appear in the webpage
2872 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2873 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2874 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2875 # TODO: Test them also? - For some videos, even these don't exist
2876 guaranteed_thumbnail_names = [
2877 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2878 'mqdefault', 'mq1', 'mq2', 'mq3',
2879 'default', '1', '2', '3'
2880 ]
2881 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2882 n_thumbnail_names = len(thumbnail_names)
2883
2884 thumbnails.extend({
2885 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2886 video_id=video_id, name=name, ext=ext,
2887 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2888 '_test_url': name in hq_thumbnail_names,
2889 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2890 for thumb in thumbnails:
2891 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2892 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2893 self._remove_duplicate_formats(thumbnails)
2894
2895 category = get_first(microformats, 'category') or search_meta('genre')
2896 channel_id = str_or_none(
2897 get_first(video_details, 'channelId')
2898 or get_first(microformats, 'externalChannelId')
2899 or search_meta('channelId'))
2900 duration = int_or_none(
2901 get_first(video_details, 'lengthSeconds')
2902 or get_first(microformats, 'lengthSeconds')
2903 or parse_duration(search_meta('duration'))) or None
2904 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2905
2906 live_content = get_first(video_details, 'isLiveContent')
2907 is_upcoming = get_first(video_details, 'isUpcoming')
2908 if is_live is None:
2909 if is_upcoming or live_content is False:
2910 is_live = False
2911 if is_upcoming is None and (live_content or is_live):
2912 is_upcoming = False
2913 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2914 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2915 if not duration and live_endtime and live_starttime:
2916 duration = live_endtime - live_starttime
2917
2918 info = {
2919 'id': video_id,
2920 'title': self._live_title(video_title) if is_live else video_title,
2921 'formats': formats,
2922 'thumbnails': thumbnails,
2923 'description': video_description,
2924 'upload_date': unified_strdate(
2925 get_first(microformats, 'uploadDate')
2926 or search_meta('uploadDate')),
2927 'uploader': get_first(video_details, 'author'),
2928 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2929 'uploader_url': owner_profile_url,
2930 'channel_id': channel_id,
2931 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2932 'duration': duration,
2933 'view_count': int_or_none(
2934 get_first((video_details, microformats), (..., 'viewCount'))
2935 or search_meta('interactionCount')),
2936 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2937 'age_limit': 18 if (
2938 get_first(microformats, 'isFamilySafe') is False
2939 or search_meta('isFamilyFriendly') == 'false'
2940 or search_meta('og:restrictions:age') == '18+') else 0,
2941 'webpage_url': webpage_url,
2942 'categories': [category] if category else None,
2943 'tags': keywords,
2944 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2945 'is_live': is_live,
2946 'was_live': (False if is_live or is_upcoming or live_content is False
2947 else None if is_live is None or is_upcoming is None
2948 else live_content),
2949 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2950 'release_timestamp': live_starttime,
2951 }
2952
2953 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2954 # Converted into dicts to remove duplicates
2955 captions = {
2956 sub.get('baseUrl'): sub
2957 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2958 translation_languages = {
2959 lang.get('languageCode'): lang.get('languageName')
2960 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2961 subtitles = {}
2962 if pctr:
2963 def process_language(container, base_url, lang_code, sub_name, query):
2964 lang_subs = container.setdefault(lang_code, [])
2965 for fmt in self._SUBTITLE_FORMATS:
2966 query.update({
2967 'fmt': fmt,
2968 })
2969 lang_subs.append({
2970 'ext': fmt,
2971 'url': update_url_query(base_url, query),
2972 'name': sub_name,
2973 })
2974
2975 for base_url, caption_track in captions.items():
2976 if not base_url:
2977 continue
2978 if caption_track.get('kind') != 'asr':
2979 lang_code = (
2980 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2981 or caption_track.get('languageCode'))
2982 if not lang_code:
2983 continue
2984 process_language(
2985 subtitles, base_url, lang_code,
2986 traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False),
2987 {})
2988 continue
2989 automatic_captions = {}
2990 for trans_code, trans_name in translation_languages.items():
2991 if not trans_code:
2992 continue
2993 process_language(
2994 automatic_captions, base_url, trans_code,
2995 self._get_text(trans_name, max_runs=1),
2996 {'tlang': trans_code})
2997 info['automatic_captions'] = automatic_captions
2998 info['subtitles'] = subtitles
2999
3000 parsed_url = compat_urllib_parse_urlparse(url)
3001 for component in [parsed_url.fragment, parsed_url.query]:
3002 query = compat_parse_qs(component)
3003 for k, v in query.items():
3004 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3005 d_k += '_time'
3006 if d_k not in info and k in s_ks:
3007 info[d_k] = parse_duration(query[k][0])
3008
3009 # Youtube Music Auto-generated description
3010 if video_description:
3011 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3012 if mobj:
3013 release_year = mobj.group('release_year')
3014 release_date = mobj.group('release_date')
3015 if release_date:
3016 release_date = release_date.replace('-', '')
3017 if not release_year:
3018 release_year = release_date[:4]
3019 info.update({
3020 'album': mobj.group('album'.strip()),
3021 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3022 'track': mobj.group('track').strip(),
3023 'release_date': release_date,
3024 'release_year': int_or_none(release_year),
3025 })
3026
3027 initial_data = None
3028 if webpage:
3029 initial_data = self._extract_yt_initial_variable(
3030 webpage, self._YT_INITIAL_DATA_RE, video_id,
3031 'yt initial data')
3032 if not initial_data:
3033 headers = self.generate_api_headers(
3034 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
3035 session_index=self._extract_session_index(master_ytcfg))
3036
3037 initial_data = self._extract_response(
3038 item_id=video_id, ep='next', fatal=False,
3039 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
3040 note='Downloading initial data API JSON')
3041
3042 try:
3043 # This will error if there is no livechat
3044 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3045 info['subtitles']['live_chat'] = [{
3046 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3047 'video_id': video_id,
3048 'ext': 'json',
3049 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3050 }]
3051 except (KeyError, IndexError, TypeError):
3052 pass
3053
3054 if initial_data:
3055 info['chapters'] = (
3056 self._extract_chapters_from_json(initial_data, duration)
3057 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3058 or None)
3059
3060 contents = try_get(
3061 initial_data,
3062 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3063 list) or []
3064 for content in contents:
3065 vpir = content.get('videoPrimaryInfoRenderer')
3066 if vpir:
3067 stl = vpir.get('superTitleLink')
3068 if stl:
3069 stl = self._get_text(stl)
3070 if try_get(
3071 vpir,
3072 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3073 info['location'] = stl
3074 else:
3075 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3076 if mobj:
3077 info.update({
3078 'series': mobj.group(1),
3079 'season_number': int(mobj.group(2)),
3080 'episode_number': int(mobj.group(3)),
3081 })
3082 for tlb in (try_get(
3083 vpir,
3084 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3085 list) or []):
3086 tbr = tlb.get('toggleButtonRenderer') or {}
3087 for getter, regex in [(
3088 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3089 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3090 lambda x: x['accessibility'],
3091 lambda x: x['accessibilityData']['accessibilityData'],
3092 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3093 label = (try_get(tbr, getter, dict) or {}).get('label')
3094 if label:
3095 mobj = re.match(regex, label)
3096 if mobj:
3097 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3098 break
3099 sbr_tooltip = try_get(
3100 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3101 if sbr_tooltip:
3102 like_count, dislike_count = sbr_tooltip.split(' / ')
3103 info.update({
3104 'like_count': str_to_int(like_count),
3105 'dislike_count': str_to_int(dislike_count),
3106 })
3107 vsir = content.get('videoSecondaryInfoRenderer')
3108 if vsir:
3109 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3110 rows = try_get(
3111 vsir,
3112 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3113 list) or []
3114 multiple_songs = False
3115 for row in rows:
3116 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3117 multiple_songs = True
3118 break
3119 for row in rows:
3120 mrr = row.get('metadataRowRenderer') or {}
3121 mrr_title = mrr.get('title')
3122 if not mrr_title:
3123 continue
3124 mrr_title = self._get_text(mrr, 'title')
3125 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3126 if mrr_title == 'License':
3127 info['license'] = mrr_contents_text
3128 elif not multiple_songs:
3129 if mrr_title == 'Album':
3130 info['album'] = mrr_contents_text
3131 elif mrr_title == 'Artist':
3132 info['artist'] = mrr_contents_text
3133 elif mrr_title == 'Song':
3134 info['track'] = mrr_contents_text
3135
3136 fallbacks = {
3137 'channel': 'uploader',
3138 'channel_id': 'uploader_id',
3139 'channel_url': 'uploader_url',
3140 }
3141 for to, frm in fallbacks.items():
3142 if not info.get(to):
3143 info[to] = info.get(frm)
3144
3145 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3146 v = info.get(s_k)
3147 if v:
3148 info[d_k] = v
3149
3150 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3151 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3152 is_membersonly = None
3153 is_premium = None
3154 if initial_data and is_private is not None:
3155 is_membersonly = False
3156 is_premium = False
3157 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3158 badge_labels = set()
3159 for content in contents:
3160 if not isinstance(content, dict):
3161 continue
3162 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3163 for badge_label in badge_labels:
3164 if badge_label.lower() == 'members only':
3165 is_membersonly = True
3166 elif badge_label.lower() == 'premium':
3167 is_premium = True
3168 elif badge_label.lower() == 'unlisted':
3169 is_unlisted = True
3170
3171 info['availability'] = self._availability(
3172 is_private=is_private,
3173 needs_premium=is_premium,
3174 needs_subscription=is_membersonly,
3175 needs_auth=info['age_limit'] >= 18,
3176 is_unlisted=None if is_private is None else is_unlisted)
3177
3178 if self.get_param('getcomments', False):
3179 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3180
3181 self.mark_watched(video_id, player_responses)
3182
3183 return info
3184
3185
3186 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3187 IE_DESC = 'YouTube.com tab'
3188 _VALID_URL = r'''(?x)
3189 https?://
3190 (?:\w+\.)?
3191 (?:
3192 youtube(?:kids)?\.com|
3193 invidio\.us
3194 )/
3195 (?:
3196 (?P<channel_type>channel|c|user|browse)/|
3197 (?P<not_channel>
3198 feed/|hashtag/|
3199 (?:playlist|watch)\?.*?\blist=
3200 )|
3201 (?!(?:%s)\b) # Direct URLs
3202 )
3203 (?P<id>[^/?\#&]+)
3204 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3205 IE_NAME = 'youtube:tab'
3206
3207 _TESTS = [{
3208 'note': 'playlists, multipage',
3209 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3210 'playlist_mincount': 94,
3211 'info_dict': {
3212 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3213 'title': 'Игорь Клейнер - Playlists',
3214 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3215 'uploader': 'Игорь Клейнер',
3216 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3217 },
3218 }, {
3219 'note': 'playlists, multipage, different order',
3220 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3221 'playlist_mincount': 94,
3222 'info_dict': {
3223 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3224 'title': 'Игорь Клейнер - Playlists',
3225 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3226 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3227 'uploader': 'Игорь Клейнер',
3228 },
3229 }, {
3230 'note': 'playlists, series',
3231 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3232 'playlist_mincount': 5,
3233 'info_dict': {
3234 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3235 'title': '3Blue1Brown - Playlists',
3236 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3237 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3238 'uploader': '3Blue1Brown',
3239 },
3240 }, {
3241 'note': 'playlists, singlepage',
3242 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3243 'playlist_mincount': 4,
3244 'info_dict': {
3245 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3246 'title': 'ThirstForScience - Playlists',
3247 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3248 'uploader': 'ThirstForScience',
3249 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3250 }
3251 }, {
3252 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3253 'only_matching': True,
3254 }, {
3255 'note': 'basic, single video playlist',
3256 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3257 'info_dict': {
3258 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3259 'uploader': 'Sergey M.',
3260 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3261 'title': 'youtube-dl public playlist',
3262 },
3263 'playlist_count': 1,
3264 }, {
3265 'note': 'empty playlist',
3266 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3267 'info_dict': {
3268 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3269 'uploader': 'Sergey M.',
3270 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3271 'title': 'youtube-dl empty playlist',
3272 },
3273 'playlist_count': 0,
3274 }, {
3275 'note': 'Home tab',
3276 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3277 'info_dict': {
3278 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3279 'title': 'lex will - Home',
3280 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3281 'uploader': 'lex will',
3282 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3283 },
3284 'playlist_mincount': 2,
3285 }, {
3286 'note': 'Videos tab',
3287 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3288 'info_dict': {
3289 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3290 'title': 'lex will - Videos',
3291 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3292 'uploader': 'lex will',
3293 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3294 },
3295 'playlist_mincount': 975,
3296 }, {
3297 'note': 'Videos tab, sorted by popular',
3298 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3299 'info_dict': {
3300 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3301 'title': 'lex will - Videos',
3302 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3303 'uploader': 'lex will',
3304 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3305 },
3306 'playlist_mincount': 199,
3307 }, {
3308 'note': 'Playlists tab',
3309 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3310 'info_dict': {
3311 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3312 'title': 'lex will - Playlists',
3313 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3314 'uploader': 'lex will',
3315 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3316 },
3317 'playlist_mincount': 17,
3318 }, {
3319 'note': 'Community tab',
3320 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3321 'info_dict': {
3322 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3323 'title': 'lex will - Community',
3324 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3325 'uploader': 'lex will',
3326 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3327 },
3328 'playlist_mincount': 18,
3329 }, {
3330 'note': 'Channels tab',
3331 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3332 'info_dict': {
3333 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3334 'title': 'lex will - Channels',
3335 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3336 'uploader': 'lex will',
3337 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3338 },
3339 'playlist_mincount': 12,
3340 }, {
3341 'note': 'Search tab',
3342 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3343 'playlist_mincount': 40,
3344 'info_dict': {
3345 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3346 'title': '3Blue1Brown - Search - linear algebra',
3347 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3348 'uploader': '3Blue1Brown',
3349 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3350 },
3351 }, {
3352 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3353 'only_matching': True,
3354 }, {
3355 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3356 'only_matching': True,
3357 }, {
3358 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3359 'only_matching': True,
3360 }, {
3361 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3362 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3363 'info_dict': {
3364 'title': '29C3: Not my department',
3365 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3366 'uploader': 'Christiaan008',
3367 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3368 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3369 },
3370 'playlist_count': 96,
3371 }, {
3372 'note': 'Large playlist',
3373 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3374 'info_dict': {
3375 'title': 'Uploads from Cauchemar',
3376 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3377 'uploader': 'Cauchemar',
3378 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3379 },
3380 'playlist_mincount': 1123,
3381 }, {
3382 'note': 'even larger playlist, 8832 videos',
3383 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3384 'only_matching': True,
3385 }, {
3386 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3387 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3388 'info_dict': {
3389 'title': 'Uploads from Interstellar Movie',
3390 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3391 'uploader': 'Interstellar Movie',
3392 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3393 },
3394 'playlist_mincount': 21,
3395 }, {
3396 'note': 'Playlist with "show unavailable videos" button',
3397 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3398 'info_dict': {
3399 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3400 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3401 'uploader': 'Phim Siêu Nhân Nhật Bản',
3402 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3403 },
3404 'playlist_mincount': 200,
3405 }, {
3406 'note': 'Playlist with unavailable videos in page 7',
3407 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3408 'info_dict': {
3409 'title': 'Uploads from BlankTV',
3410 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3411 'uploader': 'BlankTV',
3412 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3413 },
3414 'playlist_mincount': 1000,
3415 }, {
3416 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3417 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3418 'info_dict': {
3419 'title': 'Data Analysis with Dr Mike Pound',
3420 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3421 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3422 'uploader': 'Computerphile',
3423 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3424 },
3425 'playlist_mincount': 11,
3426 }, {
3427 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3428 'only_matching': True,
3429 }, {
3430 'note': 'Playlist URL that does not actually serve a playlist',
3431 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3432 'info_dict': {
3433 'id': 'FqZTN594JQw',
3434 'ext': 'webm',
3435 'title': "Smiley's People 01 detective, Adventure Series, Action",
3436 'uploader': 'STREEM',
3437 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3438 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3439 'upload_date': '20150526',
3440 'license': 'Standard YouTube License',
3441 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3442 'categories': ['People & Blogs'],
3443 'tags': list,
3444 'view_count': int,
3445 'like_count': int,
3446 'dislike_count': int,
3447 },
3448 'params': {
3449 'skip_download': True,
3450 },
3451 'skip': 'This video is not available.',
3452 'add_ie': [YoutubeIE.ie_key()],
3453 }, {
3454 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3455 'only_matching': True,
3456 }, {
3457 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3458 'only_matching': True,
3459 }, {
3460 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3461 'info_dict': {
3462 'id': '3yImotZU3tw', # This will keep changing
3463 'ext': 'mp4',
3464 'title': compat_str,
3465 'uploader': 'Sky News',
3466 'uploader_id': 'skynews',
3467 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3468 'upload_date': r're:\d{8}',
3469 'description': compat_str,
3470 'categories': ['News & Politics'],
3471 'tags': list,
3472 'like_count': int,
3473 'dislike_count': int,
3474 },
3475 'params': {
3476 'skip_download': True,
3477 },
3478 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3479 }, {
3480 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3481 'info_dict': {
3482 'id': 'a48o2S1cPoo',
3483 'ext': 'mp4',
3484 'title': 'The Young Turks - Live Main Show',
3485 'uploader': 'The Young Turks',
3486 'uploader_id': 'TheYoungTurks',
3487 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3488 'upload_date': '20150715',
3489 'license': 'Standard YouTube License',
3490 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3491 'categories': ['News & Politics'],
3492 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3493 'like_count': int,
3494 'dislike_count': int,
3495 },
3496 'params': {
3497 'skip_download': True,
3498 },
3499 'only_matching': True,
3500 }, {
3501 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3502 'only_matching': True,
3503 }, {
3504 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3505 'only_matching': True,
3506 }, {
3507 'note': 'A channel that is not live. Should raise error',
3508 'url': 'https://www.youtube.com/user/numberphile/live',
3509 'only_matching': True,
3510 }, {
3511 'url': 'https://www.youtube.com/feed/trending',
3512 'only_matching': True,
3513 }, {
3514 'url': 'https://www.youtube.com/feed/library',
3515 'only_matching': True,
3516 }, {
3517 'url': 'https://www.youtube.com/feed/history',
3518 'only_matching': True,
3519 }, {
3520 'url': 'https://www.youtube.com/feed/subscriptions',
3521 'only_matching': True,
3522 }, {
3523 'url': 'https://www.youtube.com/feed/watch_later',
3524 'only_matching': True,
3525 }, {
3526 'note': 'Recommended - redirects to home page',
3527 'url': 'https://www.youtube.com/feed/recommended',
3528 'only_matching': True,
3529 }, {
3530 'note': 'inline playlist with not always working continuations',
3531 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3532 'only_matching': True,
3533 }, {
3534 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3535 'only_matching': True,
3536 }, {
3537 'url': 'https://www.youtube.com/course',
3538 'only_matching': True,
3539 }, {
3540 'url': 'https://www.youtube.com/zsecurity',
3541 'only_matching': True,
3542 }, {
3543 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3544 'only_matching': True,
3545 }, {
3546 'url': 'https://www.youtube.com/TheYoungTurks/live',
3547 'only_matching': True,
3548 }, {
3549 'url': 'https://www.youtube.com/hashtag/cctv9',
3550 'info_dict': {
3551 'id': 'cctv9',
3552 'title': '#cctv9',
3553 },
3554 'playlist_mincount': 350,
3555 }, {
3556 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3557 'only_matching': True,
3558 }, {
3559 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3560 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3561 'only_matching': True
3562 }, {
3563 'note': '/browse/ should redirect to /channel/',
3564 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3565 'only_matching': True
3566 }, {
3567 'note': 'VLPL, should redirect to playlist?list=PL...',
3568 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3569 'info_dict': {
3570 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3571 'uploader': 'NoCopyrightSounds',
3572 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3573 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3574 'title': 'NCS Releases',
3575 },
3576 'playlist_mincount': 166,
3577 }, {
3578 'note': 'Topic, should redirect to playlist?list=UU...',
3579 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3580 'info_dict': {
3581 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3582 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3583 'title': 'Uploads from Royalty Free Music - Topic',
3584 'uploader': 'Royalty Free Music - Topic',
3585 },
3586 'expected_warnings': [
3587 'A channel/user page was given',
3588 'The URL does not have a videos tab',
3589 ],
3590 'playlist_mincount': 101,
3591 }, {
3592 'note': 'Topic without a UU playlist',
3593 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3594 'info_dict': {
3595 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3596 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3597 },
3598 'expected_warnings': [
3599 'A channel/user page was given',
3600 'The URL does not have a videos tab',
3601 'Falling back to channel URL',
3602 ],
3603 'playlist_mincount': 9,
3604 }, {
3605 'note': 'Youtube music Album',
3606 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3607 'info_dict': {
3608 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3609 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3610 },
3611 'playlist_count': 50,
3612 }, {
3613 'note': 'unlisted single video playlist',
3614 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3615 'info_dict': {
3616 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3617 'uploader': 'colethedj',
3618 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3619 'title': 'yt-dlp unlisted playlist test',
3620 'availability': 'unlisted'
3621 },
3622 'playlist_count': 1,
3623 }]
3624
3625 @classmethod
3626 def suitable(cls, url):
3627 return False if YoutubeIE.suitable(url) else super(
3628 YoutubeTabIE, cls).suitable(url)
3629
3630 def _extract_channel_id(self, webpage):
3631 channel_id = self._html_search_meta(
3632 'channelId', webpage, 'channel id', default=None)
3633 if channel_id:
3634 return channel_id
3635 channel_url = self._html_search_meta(
3636 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3637 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3638 'twitter:app:url:googleplay'), webpage, 'channel url')
3639 return self._search_regex(
3640 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3641 channel_url, 'channel id')
3642
3643 @staticmethod
3644 def _extract_basic_item_renderer(item):
3645 # Modified from _extract_grid_item_renderer
3646 known_basic_renderers = (
3647 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3648 )
3649 for key, renderer in item.items():
3650 if not isinstance(renderer, dict):
3651 continue
3652 elif key in known_basic_renderers:
3653 return renderer
3654 elif key.startswith('grid') and key.endswith('Renderer'):
3655 return renderer
3656
3657 def _grid_entries(self, grid_renderer):
3658 for item in grid_renderer['items']:
3659 if not isinstance(item, dict):
3660 continue
3661 renderer = self._extract_basic_item_renderer(item)
3662 if not isinstance(renderer, dict):
3663 continue
3664 title = self._get_text(renderer, 'title')
3665
3666 # playlist
3667 playlist_id = renderer.get('playlistId')
3668 if playlist_id:
3669 yield self.url_result(
3670 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3671 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3672 video_title=title)
3673 continue
3674 # video
3675 video_id = renderer.get('videoId')
3676 if video_id:
3677 yield self._extract_video(renderer)
3678 continue
3679 # channel
3680 channel_id = renderer.get('channelId')
3681 if channel_id:
3682 yield self.url_result(
3683 'https://www.youtube.com/channel/%s' % channel_id,
3684 ie=YoutubeTabIE.ie_key(), video_title=title)
3685 continue
3686 # generic endpoint URL support
3687 ep_url = urljoin('https://www.youtube.com/', try_get(
3688 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3689 compat_str))
3690 if ep_url:
3691 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3692 if ie.suitable(ep_url):
3693 yield self.url_result(
3694 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3695 break
3696
3697 def _shelf_entries_from_content(self, shelf_renderer):
3698 content = shelf_renderer.get('content')
3699 if not isinstance(content, dict):
3700 return
3701 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3702 if renderer:
3703 # TODO: add support for nested playlists so each shelf is processed
3704 # as separate playlist
3705 # TODO: this includes only first N items
3706 for entry in self._grid_entries(renderer):
3707 yield entry
3708 renderer = content.get('horizontalListRenderer')
3709 if renderer:
3710 # TODO
3711 pass
3712
3713 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3714 ep = try_get(
3715 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3716 compat_str)
3717 shelf_url = urljoin('https://www.youtube.com', ep)
3718 if shelf_url:
3719 # Skipping links to another channels, note that checking for
3720 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3721 # will not work
3722 if skip_channels and '/channels?' in shelf_url:
3723 return
3724 title = self._get_text(shelf_renderer, 'title')
3725 yield self.url_result(shelf_url, video_title=title)
3726 # Shelf may not contain shelf URL, fallback to extraction from content
3727 for entry in self._shelf_entries_from_content(shelf_renderer):
3728 yield entry
3729
3730 def _playlist_entries(self, video_list_renderer):
3731 for content in video_list_renderer['contents']:
3732 if not isinstance(content, dict):
3733 continue
3734 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3735 if not isinstance(renderer, dict):
3736 continue
3737 video_id = renderer.get('videoId')
3738 if not video_id:
3739 continue
3740 yield self._extract_video(renderer)
3741
3742 def _rich_entries(self, rich_grid_renderer):
3743 renderer = try_get(
3744 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3745 video_id = renderer.get('videoId')
3746 if not video_id:
3747 return
3748 yield self._extract_video(renderer)
3749
3750 def _video_entry(self, video_renderer):
3751 video_id = video_renderer.get('videoId')
3752 if video_id:
3753 return self._extract_video(video_renderer)
3754
3755 def _post_thread_entries(self, post_thread_renderer):
3756 post_renderer = try_get(
3757 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3758 if not post_renderer:
3759 return
3760 # video attachment
3761 video_renderer = try_get(
3762 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3763 video_id = video_renderer.get('videoId')
3764 if video_id:
3765 entry = self._extract_video(video_renderer)
3766 if entry:
3767 yield entry
3768 # playlist attachment
3769 playlist_id = try_get(
3770 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3771 if playlist_id:
3772 yield self.url_result(
3773 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3774 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3775 # inline video links
3776 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3777 for run in runs:
3778 if not isinstance(run, dict):
3779 continue
3780 ep_url = try_get(
3781 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3782 if not ep_url:
3783 continue
3784 if not YoutubeIE.suitable(ep_url):
3785 continue
3786 ep_video_id = YoutubeIE._match_id(ep_url)
3787 if video_id == ep_video_id:
3788 continue
3789 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3790
3791 def _post_thread_continuation_entries(self, post_thread_continuation):
3792 contents = post_thread_continuation.get('contents')
3793 if not isinstance(contents, list):
3794 return
3795 for content in contents:
3796 renderer = content.get('backstagePostThreadRenderer')
3797 if not isinstance(renderer, dict):
3798 continue
3799 for entry in self._post_thread_entries(renderer):
3800 yield entry
3801
3802 r''' # unused
3803 def _rich_grid_entries(self, contents):
3804 for content in contents:
3805 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3806 if video_renderer:
3807 entry = self._video_entry(video_renderer)
3808 if entry:
3809 yield entry
3810 '''
3811 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3812
3813 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3814 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3815 for content in contents:
3816 if not isinstance(content, dict):
3817 continue
3818 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3819 if not is_renderer:
3820 renderer = content.get('richItemRenderer')
3821 if renderer:
3822 for entry in self._rich_entries(renderer):
3823 yield entry
3824 continuation_list[0] = self._extract_continuation(parent_renderer)
3825 continue
3826 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3827 for isr_content in isr_contents:
3828 if not isinstance(isr_content, dict):
3829 continue
3830
3831 known_renderers = {
3832 'playlistVideoListRenderer': self._playlist_entries,
3833 'gridRenderer': self._grid_entries,
3834 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3835 'backstagePostThreadRenderer': self._post_thread_entries,
3836 'videoRenderer': lambda x: [self._video_entry(x)],
3837 }
3838 for key, renderer in isr_content.items():
3839 if key not in known_renderers:
3840 continue
3841 for entry in known_renderers[key](renderer):
3842 if entry:
3843 yield entry
3844 continuation_list[0] = self._extract_continuation(renderer)
3845 break
3846
3847 if not continuation_list[0]:
3848 continuation_list[0] = self._extract_continuation(is_renderer)
3849
3850 if not continuation_list[0]:
3851 continuation_list[0] = self._extract_continuation(parent_renderer)
3852
3853 continuation_list = [None] # Python 2 doesnot support nonlocal
3854 tab_content = try_get(tab, lambda x: x['content'], dict)
3855 if not tab_content:
3856 return
3857 parent_renderer = (
3858 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3859 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3860 for entry in extract_entries(parent_renderer):
3861 yield entry
3862 continuation = continuation_list[0]
3863 visitor_data = None
3864
3865 for page_num in itertools.count(1):
3866 if not continuation:
3867 break
3868 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3869 response = self._extract_response(
3870 item_id='%s page %s' % (item_id, page_num),
3871 query=continuation, headers=headers, ytcfg=ytcfg,
3872 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3873
3874 if not response:
3875 break
3876 visitor_data = try_get(
3877 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3878
3879 known_continuation_renderers = {
3880 'playlistVideoListContinuation': self._playlist_entries,
3881 'gridContinuation': self._grid_entries,
3882 'itemSectionContinuation': self._post_thread_continuation_entries,
3883 'sectionListContinuation': extract_entries, # for feeds
3884 }
3885 continuation_contents = try_get(
3886 response, lambda x: x['continuationContents'], dict) or {}
3887 continuation_renderer = None
3888 for key, value in continuation_contents.items():
3889 if key not in known_continuation_renderers:
3890 continue
3891 continuation_renderer = value
3892 continuation_list = [None]
3893 for entry in known_continuation_renderers[key](continuation_renderer):
3894 yield entry
3895 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3896 break
3897 if continuation_renderer:
3898 continue
3899
3900 known_renderers = {
3901 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3902 'gridVideoRenderer': (self._grid_entries, 'items'),
3903 'gridChannelRenderer': (self._grid_entries, 'items'),
3904 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3905 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3906 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3907 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3908 }
3909 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3910 continuation_items = try_get(
3911 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3912 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3913 video_items_renderer = None
3914 for key, value in continuation_item.items():
3915 if key not in known_renderers:
3916 continue
3917 video_items_renderer = {known_renderers[key][1]: continuation_items}
3918 continuation_list = [None]
3919 for entry in known_renderers[key][0](video_items_renderer):
3920 yield entry
3921 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3922 break
3923 if video_items_renderer:
3924 continue
3925 break
3926
3927 @staticmethod
3928 def _extract_selected_tab(tabs):
3929 for tab in tabs:
3930 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3931 if renderer.get('selected') is True:
3932 return renderer
3933 else:
3934 raise ExtractorError('Unable to find selected tab')
3935
3936 @classmethod
3937 def _extract_uploader(cls, data):
3938 uploader = {}
3939 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3940 owner = try_get(
3941 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3942 if owner:
3943 uploader['uploader'] = owner.get('text')
3944 uploader['uploader_id'] = try_get(
3945 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3946 uploader['uploader_url'] = urljoin(
3947 'https://www.youtube.com/',
3948 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3949 return {k: v for k, v in uploader.items() if v is not None}
3950
3951 def _extract_from_tabs(self, item_id, webpage, data, tabs):
3952 playlist_id = title = description = channel_url = channel_name = channel_id = None
3953 thumbnails_list = tags = []
3954
3955 selected_tab = self._extract_selected_tab(tabs)
3956 renderer = try_get(
3957 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3958 if renderer:
3959 channel_name = renderer.get('title')
3960 channel_url = renderer.get('channelUrl')
3961 channel_id = renderer.get('externalId')
3962 else:
3963 renderer = try_get(
3964 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3965
3966 if renderer:
3967 title = renderer.get('title')
3968 description = renderer.get('description', '')
3969 playlist_id = channel_id
3970 tags = renderer.get('keywords', '').split()
3971 thumbnails_list = (
3972 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3973 or try_get(
3974 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3975 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3976 list)
3977 or [])
3978
3979 thumbnails = []
3980 for t in thumbnails_list:
3981 if not isinstance(t, dict):
3982 continue
3983 thumbnail_url = url_or_none(t.get('url'))
3984 if not thumbnail_url:
3985 continue
3986 thumbnails.append({
3987 'url': thumbnail_url,
3988 'width': int_or_none(t.get('width')),
3989 'height': int_or_none(t.get('height')),
3990 })
3991 if playlist_id is None:
3992 playlist_id = item_id
3993 if title is None:
3994 title = (
3995 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3996 or playlist_id)
3997 title += format_field(selected_tab, 'title', ' - %s')
3998 title += format_field(selected_tab, 'expandedText', ' - %s')
3999 metadata = {
4000 'playlist_id': playlist_id,
4001 'playlist_title': title,
4002 'playlist_description': description,
4003 'uploader': channel_name,
4004 'uploader_id': channel_id,
4005 'uploader_url': channel_url,
4006 'thumbnails': thumbnails,
4007 'tags': tags,
4008 }
4009 availability = self._extract_availability(data)
4010 if availability:
4011 metadata['availability'] = availability
4012 if not channel_id:
4013 metadata.update(self._extract_uploader(data))
4014 metadata.update({
4015 'channel': metadata['uploader'],
4016 'channel_id': metadata['uploader_id'],
4017 'channel_url': metadata['uploader_url']})
4018 ytcfg = self.extract_ytcfg(item_id, webpage)
4019 return self.playlist_result(
4020 self._entries(
4021 selected_tab, playlist_id,
4022 self._extract_identity_token(webpage, item_id),
4023 self._extract_account_syncid(ytcfg, data), ytcfg),
4024 **metadata)
4025
4026 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
4027 first_id = last_id = None
4028 ytcfg = self.extract_ytcfg(playlist_id, webpage)
4029 headers = self.generate_api_headers(
4030 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4031 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
4032 for page_num in itertools.count(1):
4033 videos = list(self._playlist_entries(playlist))
4034 if not videos:
4035 return
4036 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4037 if start >= len(videos):
4038 return
4039 for video in videos[start:]:
4040 if video['id'] == first_id:
4041 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4042 return
4043 yield video
4044 first_id = first_id or videos[0]['id']
4045 last_id = videos[-1]['id']
4046 watch_endpoint = try_get(
4047 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4048 query = {
4049 'playlistId': playlist_id,
4050 'videoId': watch_endpoint.get('videoId') or last_id,
4051 'index': watch_endpoint.get('index') or len(videos),
4052 'params': watch_endpoint.get('params') or 'OAE%3D'
4053 }
4054 response = self._extract_response(
4055 item_id='%s page %d' % (playlist_id, page_num),
4056 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4057 check_get_keys='contents'
4058 )
4059 playlist = try_get(
4060 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4061
4062 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4063 title = playlist.get('title') or try_get(
4064 data, lambda x: x['titleText']['simpleText'], compat_str)
4065 playlist_id = playlist.get('playlistId') or item_id
4066
4067 # Delegating everything except mix playlists to regular tab-based playlist URL
4068 playlist_url = urljoin(url, try_get(
4069 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4070 compat_str))
4071 if playlist_url and playlist_url != url:
4072 return self.url_result(
4073 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4074 video_title=title)
4075
4076 return self.playlist_result(
4077 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4078 playlist_id=playlist_id, playlist_title=title)
4079
4080 def _extract_availability(self, data):
4081 """
4082 Gets the availability of a given playlist/tab.
4083 Note: Unless YouTube tells us explicitly, we do not assume it is public
4084 @param data: response
4085 """
4086 is_private = is_unlisted = None
4087 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4088 badge_labels = self._extract_badges(renderer)
4089
4090 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4091 privacy_dropdown_entries = try_get(
4092 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4093 for renderer_dict in privacy_dropdown_entries:
4094 is_selected = try_get(
4095 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4096 if not is_selected:
4097 continue
4098 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4099 if label:
4100 badge_labels.add(label.lower())
4101 break
4102
4103 for badge_label in badge_labels:
4104 if badge_label == 'unlisted':
4105 is_unlisted = True
4106 elif badge_label == 'private':
4107 is_private = True
4108 elif badge_label == 'public':
4109 is_unlisted = is_private = False
4110 return self._availability(is_private, False, False, False, is_unlisted)
4111
4112 @staticmethod
4113 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4114 sidebar_renderer = try_get(
4115 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4116 for item in sidebar_renderer:
4117 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4118 if renderer:
4119 return renderer
4120
4121 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4122 """
4123 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4124 """
4125 browse_id = params = None
4126 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4127 if not renderer:
4128 return
4129 menu_renderer = try_get(
4130 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4131 for menu_item in menu_renderer:
4132 if not isinstance(menu_item, dict):
4133 continue
4134 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4135 text = try_get(
4136 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4137 if not text or text.lower() != 'show unavailable videos':
4138 continue
4139 browse_endpoint = try_get(
4140 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4141 browse_id = browse_endpoint.get('browseId')
4142 params = browse_endpoint.get('params')
4143 break
4144
4145 ytcfg = self.extract_ytcfg(item_id, webpage)
4146 headers = self.generate_api_headers(
4147 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4148 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4149 visitor_data=try_get(
4150 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4151 query = {
4152 'params': params or 'wgYCCAA=',
4153 'browseId': browse_id or 'VL%s' % item_id
4154 }
4155 return self._extract_response(
4156 item_id=item_id, headers=headers, query=query,
4157 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4158 note='Downloading API JSON with unavailable videos')
4159
4160 def _extract_webpage(self, url, item_id):
4161 retries = self.get_param('extractor_retries', 3)
4162 count = -1
4163 last_error = 'Incomplete yt initial data recieved'
4164 while count < retries:
4165 count += 1
4166 # Sometimes youtube returns a webpage with incomplete ytInitialData
4167 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4168 if count:
4169 self.report_warning('%s. Retrying ...' % last_error)
4170 webpage = self._download_webpage(
4171 url, item_id,
4172 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4173 data = self.extract_yt_initial_data(item_id, webpage)
4174 if data.get('contents') or data.get('currentVideoEndpoint'):
4175 break
4176 # Extract alerts here only when there is error
4177 self._extract_and_report_alerts(data)
4178 if count >= retries:
4179 raise ExtractorError(last_error)
4180 return webpage, data
4181
4182 @staticmethod
4183 def _smuggle_data(entries, data):
4184 for entry in entries:
4185 if data:
4186 entry['url'] = smuggle_url(entry['url'], data)
4187 yield entry
4188
4189 def _real_extract(self, url):
4190 url, smuggled_data = unsmuggle_url(url, {})
4191 if self.is_music_url(url):
4192 smuggled_data['is_music_url'] = True
4193 info_dict = self.__real_extract(url, smuggled_data)
4194 if info_dict.get('entries'):
4195 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4196 return info_dict
4197
4198 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4199
4200 def __real_extract(self, url, smuggled_data):
4201 item_id = self._match_id(url)
4202 url = compat_urlparse.urlunparse(
4203 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4204 compat_opts = self.get_param('compat_opts', [])
4205
4206 def get_mobj(url):
4207 mobj = self._url_re.match(url).groupdict()
4208 mobj.update((k, '') for k, v in mobj.items() if v is None)
4209 return mobj
4210
4211 mobj = get_mobj(url)
4212 # Youtube returns incomplete data if tabname is not lower case
4213 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4214
4215 if is_channel:
4216 if smuggled_data.get('is_music_url'):
4217 if item_id[:2] == 'VL':
4218 # Youtube music VL channels have an equivalent playlist
4219 item_id = item_id[2:]
4220 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4221 elif item_id[:2] == 'MP':
4222 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4223 item_id = self._search_regex(
4224 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4225 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4226 'playlist id')
4227 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4228 elif mobj['channel_type'] == 'browse':
4229 # Youtube music /browse/ should be changed to /channel/
4230 pre = 'https://www.youtube.com/channel/%s' % item_id
4231 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4232 # Home URLs should redirect to /videos/
4233 self.report_warning(
4234 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4235 'To download only the videos in the home page, add a "/featured" to the URL')
4236 tab = '/videos'
4237
4238 url = ''.join((pre, tab, post))
4239 mobj = get_mobj(url)
4240
4241 # Handle both video/playlist URLs
4242 qs = parse_qs(url)
4243 video_id = qs.get('v', [None])[0]
4244 playlist_id = qs.get('list', [None])[0]
4245
4246 if not video_id and mobj['not_channel'].startswith('watch'):
4247 if not playlist_id:
4248 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4249 raise ExtractorError('Unable to recognize tab page')
4250 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4251 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4252 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4253 mobj = get_mobj(url)
4254
4255 if video_id and playlist_id:
4256 if self.get_param('noplaylist'):
4257 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4258 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4259 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4260
4261 webpage, data = self._extract_webpage(url, item_id)
4262
4263 tabs = try_get(
4264 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4265 if tabs:
4266 selected_tab = self._extract_selected_tab(tabs)
4267 tab_name = selected_tab.get('title', '')
4268 if 'no-youtube-channel-redirect' not in compat_opts:
4269 if mobj['tab'] == '/live':
4270 # Live tab should have redirected to the video
4271 raise ExtractorError('The channel is not currently live', expected=True)
4272 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4273 if not mobj['not_channel'] and item_id[:2] == 'UC':
4274 # Topic channels don't have /videos. Use the equivalent playlist instead
4275 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4276 pl_id = 'UU%s' % item_id[2:]
4277 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4278 try:
4279 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4280 for alert_type, alert_message in self._extract_alerts(pl_data):
4281 if alert_type == 'error':
4282 raise ExtractorError('Youtube said: %s' % alert_message)
4283 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4284 except ExtractorError:
4285 self.report_warning('The playlist gave error. Falling back to channel URL')
4286 else:
4287 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4288
4289 self.write_debug('Final URL: %s' % url)
4290
4291 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4292 if 'no-youtube-unavailable-videos' not in compat_opts:
4293 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4294 self._extract_and_report_alerts(data, only_once=True)
4295 tabs = try_get(
4296 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4297 if tabs:
4298 return self._extract_from_tabs(item_id, webpage, data, tabs)
4299
4300 playlist = try_get(
4301 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4302 if playlist:
4303 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4304
4305 video_id = try_get(
4306 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4307 compat_str) or video_id
4308 if video_id:
4309 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4310 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4311 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4312
4313 raise ExtractorError('Unable to recognize tab page')
4314
4315
4316 class YoutubePlaylistIE(InfoExtractor):
4317 IE_DESC = 'YouTube.com playlists'
4318 _VALID_URL = r'''(?x)(?:
4319 (?:https?://)?
4320 (?:\w+\.)?
4321 (?:
4322 (?:
4323 youtube(?:kids)?\.com|
4324 invidio\.us
4325 )
4326 /.*?\?.*?\blist=
4327 )?
4328 (?P<id>%(playlist_id)s)
4329 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4330 IE_NAME = 'youtube:playlist'
4331 _TESTS = [{
4332 'note': 'issue #673',
4333 'url': 'PLBB231211A4F62143',
4334 'info_dict': {
4335 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4336 'id': 'PLBB231211A4F62143',
4337 'uploader': 'Wickydoo',
4338 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4339 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4340 },
4341 'playlist_mincount': 29,
4342 }, {
4343 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4344 'info_dict': {
4345 'title': 'YDL_safe_search',
4346 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4347 },
4348 'playlist_count': 2,
4349 'skip': 'This playlist is private',
4350 }, {
4351 'note': 'embedded',
4352 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4353 'playlist_count': 4,
4354 'info_dict': {
4355 'title': 'JODA15',
4356 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4357 'uploader': 'milan',
4358 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4359 }
4360 }, {
4361 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4362 'playlist_mincount': 654,
4363 'info_dict': {
4364 'title': '2018 Chinese New Singles (11/6 updated)',
4365 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4366 'uploader': 'LBK',
4367 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4368 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4369 }
4370 }, {
4371 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4372 'only_matching': True,
4373 }, {
4374 # music album playlist
4375 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4376 'only_matching': True,
4377 }]
4378
4379 @classmethod
4380 def suitable(cls, url):
4381 if YoutubeTabIE.suitable(url):
4382 return False
4383 # Hack for lazy extractors until more generic solution is implemented
4384 # (see #28780)
4385 from .youtube import parse_qs
4386 qs = parse_qs(url)
4387 if qs.get('v', [None])[0]:
4388 return False
4389 return super(YoutubePlaylistIE, cls).suitable(url)
4390
4391 def _real_extract(self, url):
4392 playlist_id = self._match_id(url)
4393 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4394 url = update_url_query(
4395 'https://www.youtube.com/playlist',
4396 parse_qs(url) or {'list': playlist_id})
4397 if is_music_url:
4398 url = smuggle_url(url, {'is_music_url': True})
4399 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4400
4401
4402 class YoutubeYtBeIE(InfoExtractor):
4403 IE_DESC = 'youtu.be'
4404 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4405 _TESTS = [{
4406 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4407 'info_dict': {
4408 'id': 'yeWKywCrFtk',
4409 'ext': 'mp4',
4410 'title': 'Small Scale Baler and Braiding Rugs',
4411 'uploader': 'Backus-Page House Museum',
4412 'uploader_id': 'backuspagemuseum',
4413 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4414 'upload_date': '20161008',
4415 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4416 'categories': ['Nonprofits & Activism'],
4417 'tags': list,
4418 'like_count': int,
4419 'dislike_count': int,
4420 },
4421 'params': {
4422 'noplaylist': True,
4423 'skip_download': True,
4424 },
4425 }, {
4426 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4427 'only_matching': True,
4428 }]
4429
4430 def _real_extract(self, url):
4431 mobj = self._match_valid_url(url)
4432 video_id = mobj.group('id')
4433 playlist_id = mobj.group('playlist_id')
4434 return self.url_result(
4435 update_url_query('https://www.youtube.com/watch', {
4436 'v': video_id,
4437 'list': playlist_id,
4438 'feature': 'youtu.be',
4439 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4440
4441
4442 class YoutubeYtUserIE(InfoExtractor):
4443 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4444 _VALID_URL = r'ytuser:(?P<id>.+)'
4445 _TESTS = [{
4446 'url': 'ytuser:phihag',
4447 'only_matching': True,
4448 }]
4449
4450 def _real_extract(self, url):
4451 user_id = self._match_id(url)
4452 return self.url_result(
4453 'https://www.youtube.com/user/%s' % user_id,
4454 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4455
4456
4457 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4458 IE_NAME = 'youtube:favorites'
4459 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4460 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4461 _LOGIN_REQUIRED = True
4462 _TESTS = [{
4463 'url': ':ytfav',
4464 'only_matching': True,
4465 }, {
4466 'url': ':ytfavorites',
4467 'only_matching': True,
4468 }]
4469
4470 def _real_extract(self, url):
4471 return self.url_result(
4472 'https://www.youtube.com/playlist?list=LL',
4473 ie=YoutubeTabIE.ie_key())
4474
4475
4476 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4477 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4478 # there doesn't appear to be a real limit, for example if you search for
4479 # 'python' you get more than 8.000.000 results
4480 _MAX_RESULTS = float('inf')
4481 IE_NAME = 'youtube:search'
4482 _SEARCH_KEY = 'ytsearch'
4483 _SEARCH_PARAMS = None
4484 _TESTS = []
4485
4486 def _entries(self, query, n):
4487 data = {'query': query}
4488 if self._SEARCH_PARAMS:
4489 data['params'] = self._SEARCH_PARAMS
4490 total = 0
4491 continuation = {}
4492 for page_num in itertools.count(1):
4493 data.update(continuation)
4494 search = self._extract_response(
4495 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4496 check_get_keys=('contents', 'onResponseReceivedCommands')
4497 )
4498 if not search:
4499 break
4500 slr_contents = try_get(
4501 search,
4502 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4503 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4504 list)
4505 if not slr_contents:
4506 break
4507
4508 # Youtube sometimes adds promoted content to searches,
4509 # changing the index location of videos and token.
4510 # So we search through all entries till we find them.
4511 continuation = None
4512 for slr_content in slr_contents:
4513 if not continuation:
4514 continuation = self._extract_continuation({'contents': [slr_content]})
4515
4516 isr_contents = try_get(
4517 slr_content,
4518 lambda x: x['itemSectionRenderer']['contents'],
4519 list)
4520 if not isr_contents:
4521 continue
4522 for content in isr_contents:
4523 if not isinstance(content, dict):
4524 continue
4525 video = content.get('videoRenderer')
4526 if not isinstance(video, dict):
4527 continue
4528 video_id = video.get('videoId')
4529 if not video_id:
4530 continue
4531
4532 yield self._extract_video(video)
4533 total += 1
4534 if total == n:
4535 return
4536
4537 if not continuation:
4538 break
4539
4540 def _get_n_results(self, query, n):
4541 """Get a specified number of results for a query"""
4542 return self.playlist_result(self._entries(query, n), query, query)
4543
4544
4545 class YoutubeSearchDateIE(YoutubeSearchIE):
4546 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4547 _SEARCH_KEY = 'ytsearchdate'
4548 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4549 _SEARCH_PARAMS = 'CAI%3D'
4550
4551
4552 class YoutubeSearchURLIE(YoutubeSearchIE):
4553 IE_DESC = 'YouTube.com search URLs'
4554 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4555 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4556 # _MAX_RESULTS = 100
4557 _TESTS = [{
4558 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4559 'playlist_mincount': 5,
4560 'info_dict': {
4561 'id': 'youtube-dl test video',
4562 'title': 'youtube-dl test video',
4563 }
4564 }, {
4565 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4566 'only_matching': True,
4567 }]
4568
4569 @classmethod
4570 def _make_valid_url(cls):
4571 return cls._VALID_URL
4572
4573 def _real_extract(self, url):
4574 qs = parse_qs(url)
4575 query = (qs.get('search_query') or qs.get('q'))[0]
4576 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4577 return self._get_n_results(query, self._MAX_RESULTS)
4578
4579
4580 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4581 """
4582 Base class for feed extractors
4583 Subclasses must define the _FEED_NAME property.
4584 """
4585 _LOGIN_REQUIRED = True
4586 _TESTS = []
4587
4588 @property
4589 def IE_NAME(self):
4590 return 'youtube:%s' % self._FEED_NAME
4591
4592 def _real_extract(self, url):
4593 return self.url_result(
4594 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4595 ie=YoutubeTabIE.ie_key())
4596
4597
4598 class YoutubeWatchLaterIE(InfoExtractor):
4599 IE_NAME = 'youtube:watchlater'
4600 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4601 _VALID_URL = r':ytwatchlater'
4602 _TESTS = [{
4603 'url': ':ytwatchlater',
4604 'only_matching': True,
4605 }]
4606
4607 def _real_extract(self, url):
4608 return self.url_result(
4609 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4610
4611
4612 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4613 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4614 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4615 _FEED_NAME = 'recommended'
4616 _LOGIN_REQUIRED = False
4617 _TESTS = [{
4618 'url': ':ytrec',
4619 'only_matching': True,
4620 }, {
4621 'url': ':ytrecommended',
4622 'only_matching': True,
4623 }, {
4624 'url': 'https://youtube.com',
4625 'only_matching': True,
4626 }]
4627
4628
4629 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4630 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4631 _VALID_URL = r':ytsub(?:scription)?s?'
4632 _FEED_NAME = 'subscriptions'
4633 _TESTS = [{
4634 'url': ':ytsubs',
4635 'only_matching': True,
4636 }, {
4637 'url': ':ytsubscriptions',
4638 'only_matching': True,
4639 }]
4640
4641
4642 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4643 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4644 _VALID_URL = r':ythis(?:tory)?'
4645 _FEED_NAME = 'history'
4646 _TESTS = [{
4647 'url': ':ythistory',
4648 'only_matching': True,
4649 }]
4650
4651
4652 class YoutubeTruncatedURLIE(InfoExtractor):
4653 IE_NAME = 'youtube:truncated_url'
4654 IE_DESC = False # Do not list
4655 _VALID_URL = r'''(?x)
4656 (?:https?://)?
4657 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4658 (?:watch\?(?:
4659 feature=[a-z_]+|
4660 annotation_id=annotation_[^&]+|
4661 x-yt-cl=[0-9]+|
4662 hl=[^&]*|
4663 t=[0-9]+
4664 )?
4665 |
4666 attribution_link\?a=[^&]+
4667 )
4668 $
4669 '''
4670
4671 _TESTS = [{
4672 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4673 'only_matching': True,
4674 }, {
4675 'url': 'https://www.youtube.com/watch?',
4676 'only_matching': True,
4677 }, {
4678 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4679 'only_matching': True,
4680 }, {
4681 'url': 'https://www.youtube.com/watch?feature=foo',
4682 'only_matching': True,
4683 }, {
4684 'url': 'https://www.youtube.com/watch?hl=en-GB',
4685 'only_matching': True,
4686 }, {
4687 'url': 'https://www.youtube.com/watch?t=2372',
4688 'only_matching': True,
4689 }]
4690
4691 def _real_extract(self, url):
4692 raise ExtractorError(
4693 'Did you forget to quote the URL? Remember that & is a meta '
4694 'character in most shells, so you want to put the URL in quotes, '
4695 'like youtube-dl '
4696 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4697 ' or simply youtube-dl BaW_jenozKc .',
4698 expected=True)
4699
4700
4701 class YoutubeTruncatedIDIE(InfoExtractor):
4702 IE_NAME = 'youtube:truncated_id'
4703 IE_DESC = False # Do not list
4704 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4705
4706 _TESTS = [{
4707 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4708 'only_matching': True,
4709 }]
4710
4711 def _real_extract(self, url):
4712 video_id = self._match_id(url)
4713 raise ExtractorError(
4714 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4715 expected=True)