]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/youtube.py
[southpark] Fix SouthParkDE (#812)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
... / ...
CommitLineData
1# coding: utf-8
2
3from __future__ import unicode_literals
4
5import base64
6import calendar
7import copy
8import datetime
9import hashlib
10import itertools
11import json
12import os.path
13import random
14import re
15import time
16import traceback
17
18from .common import InfoExtractor, SearchInfoExtractor
19from ..compat import (
20 compat_chr,
21 compat_HTTPError,
22 compat_parse_qs,
23 compat_str,
24 compat_urllib_parse_unquote_plus,
25 compat_urllib_parse_urlencode,
26 compat_urllib_parse_urlparse,
27 compat_urlparse,
28)
29from ..jsinterp import JSInterpreter
30from ..utils import (
31 bytes_to_intlist,
32 clean_html,
33 datetime_from_str,
34 dict_get,
35 error_to_compat_str,
36 ExtractorError,
37 float_or_none,
38 format_field,
39 int_or_none,
40 intlist_to_bytes,
41 is_html,
42 mimetype2ext,
43 network_exceptions,
44 orderedSet,
45 parse_codecs,
46 parse_count,
47 parse_duration,
48 parse_iso8601,
49 parse_qs,
50 qualities,
51 remove_start,
52 smuggle_url,
53 str_or_none,
54 str_to_int,
55 traverse_obj,
56 try_get,
57 unescapeHTML,
58 unified_strdate,
59 unsmuggle_url,
60 update_url_query,
61 url_or_none,
62 urljoin,
63 variadic,
64)
65
66
67# any clients starting with _ cannot be explicity requested by the user
68INNERTUBE_CLIENTS = {
69 'web': {
70 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
71 'INNERTUBE_CONTEXT': {
72 'client': {
73 'clientName': 'WEB',
74 'clientVersion': '2.20210622.10.00',
75 }
76 },
77 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
78 },
79 'web_embedded': {
80 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
81 'INNERTUBE_CONTEXT': {
82 'client': {
83 'clientName': 'WEB_EMBEDDED_PLAYER',
84 'clientVersion': '1.20210620.0.1',
85 },
86 },
87 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
88 },
89 'web_music': {
90 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
91 'INNERTUBE_HOST': 'music.youtube.com',
92 'INNERTUBE_CONTEXT': {
93 'client': {
94 'clientName': 'WEB_REMIX',
95 'clientVersion': '1.20210621.00.00',
96 }
97 },
98 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
99 },
100 'web_creator': {
101 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
102 'INNERTUBE_CONTEXT': {
103 'client': {
104 'clientName': 'WEB_CREATOR',
105 'clientVersion': '1.20210621.00.00',
106 }
107 },
108 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
109 },
110 'android': {
111 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
112 'INNERTUBE_CONTEXT': {
113 'client': {
114 'clientName': 'ANDROID',
115 'clientVersion': '16.20',
116 }
117 },
118 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
119 },
120 'android_embedded': {
121 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
122 'INNERTUBE_CONTEXT': {
123 'client': {
124 'clientName': 'ANDROID_EMBEDDED_PLAYER',
125 'clientVersion': '16.20',
126 },
127 },
128 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
129 },
130 'android_music': {
131 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
132 'INNERTUBE_HOST': 'music.youtube.com',
133 'INNERTUBE_CONTEXT': {
134 'client': {
135 'clientName': 'ANDROID_MUSIC',
136 'clientVersion': '4.32',
137 }
138 },
139 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
140 },
141 'android_creator': {
142 'INNERTUBE_CONTEXT': {
143 'client': {
144 'clientName': 'ANDROID_CREATOR',
145 'clientVersion': '21.24.100',
146 },
147 },
148 'INNERTUBE_CONTEXT_CLIENT_NAME': 14
149 },
150 # ios has HLS live streams
151 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
152 'ios': {
153 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
154 'INNERTUBE_CONTEXT': {
155 'client': {
156 'clientName': 'IOS',
157 'clientVersion': '16.20',
158 }
159 },
160 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
161 },
162 'ios_embedded': {
163 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
164 'INNERTUBE_CONTEXT': {
165 'client': {
166 'clientName': 'IOS_MESSAGES_EXTENSION',
167 'clientVersion': '16.20',
168 },
169 },
170 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
171 },
172 'ios_music': {
173 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
174 'INNERTUBE_HOST': 'music.youtube.com',
175 'INNERTUBE_CONTEXT': {
176 'client': {
177 'clientName': 'IOS_MUSIC',
178 'clientVersion': '4.32',
179 },
180 },
181 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
182 },
183 'ios_creator': {
184 'INNERTUBE_CONTEXT': {
185 'client': {
186 'clientName': 'IOS_CREATOR',
187 'clientVersion': '21.24.100',
188 },
189 },
190 'INNERTUBE_CONTEXT_CLIENT_NAME': 15
191 },
192 # mweb has 'ultralow' formats
193 # See: https://github.com/yt-dlp/yt-dlp/pull/557
194 'mweb': {
195 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
196 'INNERTUBE_CONTEXT': {
197 'client': {
198 'clientName': 'MWEB',
199 'clientVersion': '2.20210721.07.00',
200 }
201 },
202 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
203 },
204}
205
206
207def build_innertube_clients():
208 third_party = {
209 'embedUrl': 'https://google.com', # Can be any valid URL
210 }
211 base_clients = ('android', 'web', 'ios', 'mweb')
212 priority = qualities(base_clients[::-1])
213
214 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
215 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
216 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
217 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
218 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
219
220 if client in base_clients:
221 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
222 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
223 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
224 agegate_ytcfg['priority'] -= 1
225 elif client.endswith('_embedded'):
226 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
227 ytcfg['priority'] -= 2
228 else:
229 ytcfg['priority'] -= 3
230
231
232build_innertube_clients()
233
234
235class YoutubeBaseInfoExtractor(InfoExtractor):
236 """Provide base functions for Youtube extractors"""
237
238 _RESERVED_NAMES = (
239 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
240 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
241 r'browse|oembed|get_video_info|iframe_api|s/player|'
242 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
243
244 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
245
246 _NETRC_MACHINE = 'youtube'
247
248 # If True it will raise an error if no login info is provided
249 _LOGIN_REQUIRED = False
250
251 r''' # Unused since login is broken
252 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
253 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
254
255 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
256 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
257 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
258 '''
259
260 def _login(self):
261 """
262 Attempt to log in to YouTube.
263 True is returned if successful or skipped.
264 False is returned if login failed.
265
266 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
267 """
268
269 def warn(message):
270 self.report_warning(message)
271
272 # username+password login is broken
273 if (self._LOGIN_REQUIRED
274 and self.get_param('cookiefile') is None
275 and self.get_param('cookiesfrombrowser') is None):
276 self.raise_login_required(
277 'Login details are needed to download this content', method='cookies')
278 username, password = self._get_login_info()
279 if username:
280 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
281 return
282
283 # Everything below this is broken!
284 r'''
285 # No authentication to be performed
286 if username is None:
287 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
288 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
289 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
290 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
291 return True
292
293 login_page = self._download_webpage(
294 self._LOGIN_URL, None,
295 note='Downloading login page',
296 errnote='unable to fetch login page', fatal=False)
297 if login_page is False:
298 return
299
300 login_form = self._hidden_inputs(login_page)
301
302 def req(url, f_req, note, errnote):
303 data = login_form.copy()
304 data.update({
305 'pstMsg': 1,
306 'checkConnection': 'youtube',
307 'checkedDomains': 'youtube',
308 'hl': 'en',
309 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
310 'f.req': json.dumps(f_req),
311 'flowName': 'GlifWebSignIn',
312 'flowEntry': 'ServiceLogin',
313 # TODO: reverse actual botguard identifier generation algo
314 'bgRequest': '["identifier",""]',
315 })
316 return self._download_json(
317 url, None, note=note, errnote=errnote,
318 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
319 fatal=False,
320 data=urlencode_postdata(data), headers={
321 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
322 'Google-Accounts-XSRF': 1,
323 })
324
325 lookup_req = [
326 username,
327 None, [], None, 'US', None, None, 2, False, True,
328 [
329 None, None,
330 [2, 1, None, 1,
331 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
332 None, [], 4],
333 1, [None, None, []], None, None, None, True
334 ],
335 username,
336 ]
337
338 lookup_results = req(
339 self._LOOKUP_URL, lookup_req,
340 'Looking up account info', 'Unable to look up account info')
341
342 if lookup_results is False:
343 return False
344
345 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
346 if not user_hash:
347 warn('Unable to extract user hash')
348 return False
349
350 challenge_req = [
351 user_hash,
352 None, 1, None, [1, None, None, None, [password, None, True]],
353 [
354 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
355 1, [None, None, []], None, None, None, True
356 ]]
357
358 challenge_results = req(
359 self._CHALLENGE_URL, challenge_req,
360 'Logging in', 'Unable to log in')
361
362 if challenge_results is False:
363 return
364
365 login_res = try_get(challenge_results, lambda x: x[0][5], list)
366 if login_res:
367 login_msg = try_get(login_res, lambda x: x[5], compat_str)
368 warn(
369 'Unable to login: %s' % 'Invalid password'
370 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
371 return False
372
373 res = try_get(challenge_results, lambda x: x[0][-1], list)
374 if not res:
375 warn('Unable to extract result entry')
376 return False
377
378 login_challenge = try_get(res, lambda x: x[0][0], list)
379 if login_challenge:
380 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
381 if challenge_str == 'TWO_STEP_VERIFICATION':
382 # SEND_SUCCESS - TFA code has been successfully sent to phone
383 # QUOTA_EXCEEDED - reached the limit of TFA codes
384 status = try_get(login_challenge, lambda x: x[5], compat_str)
385 if status == 'QUOTA_EXCEEDED':
386 warn('Exceeded the limit of TFA codes, try later')
387 return False
388
389 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
390 if not tl:
391 warn('Unable to extract TL')
392 return False
393
394 tfa_code = self._get_tfa_info('2-step verification code')
395
396 if not tfa_code:
397 warn(
398 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
399 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
400 return False
401
402 tfa_code = remove_start(tfa_code, 'G-')
403
404 tfa_req = [
405 user_hash, None, 2, None,
406 [
407 9, None, None, None, None, None, None, None,
408 [None, tfa_code, True, 2]
409 ]]
410
411 tfa_results = req(
412 self._TFA_URL.format(tl), tfa_req,
413 'Submitting TFA code', 'Unable to submit TFA code')
414
415 if tfa_results is False:
416 return False
417
418 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
419 if tfa_res:
420 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
421 warn(
422 'Unable to finish TFA: %s' % 'Invalid TFA code'
423 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
424 return False
425
426 check_cookie_url = try_get(
427 tfa_results, lambda x: x[0][-1][2], compat_str)
428 else:
429 CHALLENGES = {
430 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
431 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
432 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
433 }
434 challenge = CHALLENGES.get(
435 challenge_str,
436 '%s returned error %s.' % (self.IE_NAME, challenge_str))
437 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
438 return False
439 else:
440 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
441
442 if not check_cookie_url:
443 warn('Unable to extract CheckCookie URL')
444 return False
445
446 check_cookie_results = self._download_webpage(
447 check_cookie_url, None, 'Checking cookie', fatal=False)
448
449 if check_cookie_results is False:
450 return False
451
452 if 'https://myaccount.google.com/' not in check_cookie_results:
453 warn('Unable to log in')
454 return False
455
456 return True
457 '''
458
459 def _initialize_consent(self):
460 cookies = self._get_cookies('https://www.youtube.com/')
461 if cookies.get('__Secure-3PSID'):
462 return
463 consent_id = None
464 consent = cookies.get('CONSENT')
465 if consent:
466 if 'YES' in consent.value:
467 return
468 consent_id = self._search_regex(
469 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
470 if not consent_id:
471 consent_id = random.randint(100, 999)
472 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
473
474 def _real_initialize(self):
475 self._initialize_consent()
476 if self._downloader is None:
477 return
478 if not self._login():
479 return
480
481 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
482 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
483 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
484
485 def _get_default_ytcfg(self, client='web'):
486 return copy.deepcopy(INNERTUBE_CLIENTS[client])
487
488 def _get_innertube_host(self, client='web'):
489 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
490
491 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
492 # try_get but with fallback to default ytcfg client values when present
493 _func = lambda y: try_get(y, getter, expected_type)
494 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
495
496 def _extract_client_name(self, ytcfg, default_client='web'):
497 return self._ytcfg_get_safe(
498 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
499 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
500
501 @staticmethod
502 def _extract_session_index(*data):
503 for ytcfg in data:
504 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
505 if session_index is not None:
506 return session_index
507
508 def _extract_client_version(self, ytcfg, default_client='web'):
509 return self._ytcfg_get_safe(
510 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
511 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
512
513 def _extract_api_key(self, ytcfg=None, default_client='web'):
514 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
515
516 def _extract_context(self, ytcfg=None, default_client='web'):
517 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
518 context = _get_context(ytcfg)
519 if context:
520 return context
521
522 context = _get_context(self._get_default_ytcfg(default_client))
523 if not ytcfg:
524 return context
525
526 # Recreate the client context (required)
527 context['client'].update({
528 'clientVersion': self._extract_client_version(ytcfg, default_client),
529 'clientName': self._extract_client_name(ytcfg, default_client),
530 })
531 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
532 if visitor_data:
533 context['client']['visitorData'] = visitor_data
534 return context
535
536 _SAPISID = None
537
538 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
539 time_now = round(time.time())
540 if self._SAPISID is None:
541 yt_cookies = self._get_cookies('https://www.youtube.com')
542 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
543 # See: https://github.com/yt-dlp/yt-dlp/issues/393
544 sapisid_cookie = dict_get(
545 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
546 if sapisid_cookie and sapisid_cookie.value:
547 self._SAPISID = sapisid_cookie.value
548 self.write_debug('Extracted SAPISID cookie')
549 # SAPISID cookie is required if not already present
550 if not yt_cookies.get('SAPISID'):
551 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
552 self._set_cookie(
553 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
554 else:
555 self._SAPISID = False
556 if not self._SAPISID:
557 return None
558 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
559 sapisidhash = hashlib.sha1(
560 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
561 return f'SAPISIDHASH {time_now}_{sapisidhash}'
562
563 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
564 note='Downloading API JSON', errnote='Unable to download API page',
565 context=None, api_key=None, api_hostname=None, default_client='web'):
566
567 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
568 data.update(query)
569 real_headers = self.generate_api_headers(default_client=default_client)
570 real_headers.update({'content-type': 'application/json'})
571 if headers:
572 real_headers.update(headers)
573 return self._download_json(
574 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
575 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
576 data=json.dumps(data).encode('utf8'), headers=real_headers,
577 query={'key': api_key or self._extract_api_key()})
578
579 def extract_yt_initial_data(self, video_id, webpage):
580 return self._parse_json(
581 self._search_regex(
582 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
583 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
584 video_id)
585
586 def _extract_identity_token(self, webpage, item_id):
587 if not webpage:
588 return None
589 ytcfg = self.extract_ytcfg(item_id, webpage)
590 if ytcfg:
591 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
592 if token:
593 return token
594 return self._search_regex(
595 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
596 'identity token', default=None)
597
598 @staticmethod
599 def _extract_account_syncid(*args):
600 """
601 Extract syncId required to download private playlists of secondary channels
602 @params response and/or ytcfg
603 """
604 for data in args:
605 # ytcfg includes channel_syncid if on secondary channel
606 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
607 if delegated_sid:
608 return delegated_sid
609 sync_ids = (try_get(
610 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
611 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
612 if len(sync_ids) >= 2 and sync_ids[1]:
613 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
614 # and just "user_syncid||" for primary channel. We only want the channel_syncid
615 return sync_ids[0]
616
617 def extract_ytcfg(self, video_id, webpage):
618 if not webpage:
619 return {}
620 return self._parse_json(
621 self._search_regex(
622 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
623 default='{}'), video_id, fatal=False) or {}
624
625 def generate_api_headers(
626 self, ytcfg=None, identity_token=None, account_syncid=None,
627 visitor_data=None, api_hostname=None, default_client='web', session_index=None):
628 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
629 headers = {
630 'X-YouTube-Client-Name': compat_str(
631 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
632 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
633 'Origin': origin
634 }
635 if not visitor_data and ytcfg:
636 visitor_data = try_get(
637 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
638 if identity_token:
639 headers['X-Youtube-Identity-Token'] = identity_token
640 if account_syncid:
641 headers['X-Goog-PageId'] = account_syncid
642 if session_index is None and ytcfg:
643 session_index = self._extract_session_index(ytcfg)
644 if account_syncid or session_index is not None:
645 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
646 if visitor_data:
647 headers['X-Goog-Visitor-Id'] = visitor_data
648 auth = self._generate_sapisidhash_header(origin)
649 if auth is not None:
650 headers['Authorization'] = auth
651 headers['X-Origin'] = origin
652 return headers
653
654 @staticmethod
655 def _build_api_continuation_query(continuation, ctp=None):
656 query = {
657 'continuation': continuation
658 }
659 # TODO: Inconsistency with clickTrackingParams.
660 # Currently we have a fixed ctp contained within context (from ytcfg)
661 # and a ctp in root query for continuation.
662 if ctp:
663 query['clickTracking'] = {'clickTrackingParams': ctp}
664 return query
665
666 @classmethod
667 def _extract_next_continuation_data(cls, renderer):
668 next_continuation = try_get(
669 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
670 lambda x: x['continuation']['reloadContinuationData']), dict)
671 if not next_continuation:
672 return
673 continuation = next_continuation.get('continuation')
674 if not continuation:
675 return
676 ctp = next_continuation.get('clickTrackingParams')
677 return cls._build_api_continuation_query(continuation, ctp)
678
679 @classmethod
680 def _extract_continuation_ep_data(cls, continuation_ep: dict):
681 if isinstance(continuation_ep, dict):
682 continuation = try_get(
683 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
684 if not continuation:
685 return
686 ctp = continuation_ep.get('clickTrackingParams')
687 return cls._build_api_continuation_query(continuation, ctp)
688
689 @classmethod
690 def _extract_continuation(cls, renderer):
691 next_continuation = cls._extract_next_continuation_data(renderer)
692 if next_continuation:
693 return next_continuation
694
695 contents = []
696 for key in ('contents', 'items'):
697 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
698
699 for content in contents:
700 if not isinstance(content, dict):
701 continue
702 continuation_ep = try_get(
703 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
704 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
705 dict)
706 continuation = cls._extract_continuation_ep_data(continuation_ep)
707 if continuation:
708 return continuation
709
710 @classmethod
711 def _extract_alerts(cls, data):
712 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
713 if not isinstance(alert_dict, dict):
714 continue
715 for alert in alert_dict.values():
716 alert_type = alert.get('type')
717 if not alert_type:
718 continue
719 message = cls._get_text(alert, 'text')
720 if message:
721 yield alert_type, message
722
723 def _report_alerts(self, alerts, expected=True, fatal=True):
724 errors = []
725 warnings = []
726 for alert_type, alert_message in alerts:
727 if alert_type.lower() == 'error' and fatal:
728 errors.append([alert_type, alert_message])
729 else:
730 warnings.append([alert_type, alert_message])
731
732 for alert_type, alert_message in (warnings + errors[:-1]):
733 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
734 if errors:
735 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
736
737 def _extract_and_report_alerts(self, data, *args, **kwargs):
738 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
739
740 def _extract_badges(self, renderer: dict):
741 badges = set()
742 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
743 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
744 if label:
745 badges.add(label.lower())
746 return badges
747
748 @staticmethod
749 def _get_text(data, *path_list, max_runs=None):
750 for path in path_list or [None]:
751 if path is None:
752 obj = [data]
753 else:
754 obj = traverse_obj(data, path, default=[])
755 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
756 obj = [obj]
757 for item in obj:
758 text = try_get(item, lambda x: x['simpleText'], compat_str)
759 if text:
760 return text
761 runs = try_get(item, lambda x: x['runs'], list) or []
762 if not runs and isinstance(item, list):
763 runs = item
764
765 runs = runs[:min(len(runs), max_runs or len(runs))]
766 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
767 if text:
768 return text
769
770 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
771 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
772 default_client='web'):
773 response = None
774 last_error = None
775 count = -1
776 retries = self.get_param('extractor_retries', 3)
777 if check_get_keys is None:
778 check_get_keys = []
779 while count < retries:
780 count += 1
781 if last_error:
782 self.report_warning('%s. Retrying ...' % last_error)
783 try:
784 response = self._call_api(
785 ep=ep, fatal=True, headers=headers,
786 video_id=item_id, query=query,
787 context=self._extract_context(ytcfg, default_client),
788 api_key=self._extract_api_key(ytcfg, default_client),
789 api_hostname=api_hostname, default_client=default_client,
790 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
791 except ExtractorError as e:
792 if isinstance(e.cause, network_exceptions):
793 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
794 e.cause.seek(0)
795 yt_error = try_get(
796 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
797 lambda x: x['error']['message'], compat_str)
798 if yt_error:
799 self._report_alerts([('ERROR', yt_error)], fatal=False)
800 # Downloading page may result in intermittent 5xx HTTP error
801 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
802 # We also want to catch all other network exceptions since errors in later pages can be troublesome
803 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
804 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
805 last_error = error_to_compat_str(e.cause or e)
806 if count < retries:
807 continue
808 if fatal:
809 raise
810 else:
811 self.report_warning(error_to_compat_str(e))
812 return
813
814 else:
815 # Youtube may send alerts if there was an issue with the continuation page
816 try:
817 self._extract_and_report_alerts(response, expected=False)
818 except ExtractorError as e:
819 if fatal:
820 raise
821 self.report_warning(error_to_compat_str(e))
822 return
823 if not check_get_keys or dict_get(response, check_get_keys):
824 break
825 # Youtube sometimes sends incomplete data
826 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
827 last_error = 'Incomplete data received'
828 if count >= retries:
829 if fatal:
830 raise ExtractorError(last_error)
831 else:
832 self.report_warning(last_error)
833 return
834 return response
835
836 @staticmethod
837 def is_music_url(url):
838 return re.match(r'https?://music\.youtube\.com/', url) is not None
839
840 def _extract_video(self, renderer):
841 video_id = renderer.get('videoId')
842 title = self._get_text(renderer, 'title')
843 description = self._get_text(renderer, 'descriptionSnippet')
844 duration = parse_duration(self._get_text(
845 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
846 view_count_text = self._get_text(renderer, 'viewCountText') or ''
847 view_count = str_to_int(self._search_regex(
848 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
849 'view count', default=None))
850
851 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
852
853 return {
854 '_type': 'url',
855 'ie_key': YoutubeIE.ie_key(),
856 'id': video_id,
857 'url': video_id,
858 'title': title,
859 'description': description,
860 'duration': duration,
861 'view_count': view_count,
862 'uploader': uploader,
863 }
864
865
866class YoutubeIE(YoutubeBaseInfoExtractor):
867 IE_DESC = 'YouTube.com'
868 _INVIDIOUS_SITES = (
869 # invidious-redirect websites
870 r'(?:www\.)?redirect\.invidious\.io',
871 r'(?:(?:www|dev)\.)?invidio\.us',
872 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
873 r'(?:www\.)?invidious\.pussthecat\.org',
874 r'(?:www\.)?invidious\.zee\.li',
875 r'(?:www\.)?invidious\.ethibox\.fr',
876 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
877 # youtube-dl invidious instances list
878 r'(?:(?:www|no)\.)?invidiou\.sh',
879 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
880 r'(?:www\.)?invidious\.kabi\.tk',
881 r'(?:www\.)?invidious\.mastodon\.host',
882 r'(?:www\.)?invidious\.zapashcanon\.fr',
883 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
884 r'(?:www\.)?invidious\.tinfoil-hat\.net',
885 r'(?:www\.)?invidious\.himiko\.cloud',
886 r'(?:www\.)?invidious\.reallyancient\.tech',
887 r'(?:www\.)?invidious\.tube',
888 r'(?:www\.)?invidiou\.site',
889 r'(?:www\.)?invidious\.site',
890 r'(?:www\.)?invidious\.xyz',
891 r'(?:www\.)?invidious\.nixnet\.xyz',
892 r'(?:www\.)?invidious\.048596\.xyz',
893 r'(?:www\.)?invidious\.drycat\.fr',
894 r'(?:www\.)?inv\.skyn3t\.in',
895 r'(?:www\.)?tube\.poal\.co',
896 r'(?:www\.)?tube\.connect\.cafe',
897 r'(?:www\.)?vid\.wxzm\.sx',
898 r'(?:www\.)?vid\.mint\.lgbt',
899 r'(?:www\.)?vid\.puffyan\.us',
900 r'(?:www\.)?yewtu\.be',
901 r'(?:www\.)?yt\.elukerio\.org',
902 r'(?:www\.)?yt\.lelux\.fi',
903 r'(?:www\.)?invidious\.ggc-project\.de',
904 r'(?:www\.)?yt\.maisputain\.ovh',
905 r'(?:www\.)?ytprivate\.com',
906 r'(?:www\.)?invidious\.13ad\.de',
907 r'(?:www\.)?invidious\.toot\.koeln',
908 r'(?:www\.)?invidious\.fdn\.fr',
909 r'(?:www\.)?watch\.nettohikari\.com',
910 r'(?:www\.)?invidious\.namazso\.eu',
911 r'(?:www\.)?invidious\.silkky\.cloud',
912 r'(?:www\.)?invidious\.exonip\.de',
913 r'(?:www\.)?invidious\.riverside\.rocks',
914 r'(?:www\.)?invidious\.blamefran\.net',
915 r'(?:www\.)?invidious\.moomoo\.de',
916 r'(?:www\.)?ytb\.trom\.tf',
917 r'(?:www\.)?yt\.cyberhost\.uk',
918 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
919 r'(?:www\.)?qklhadlycap4cnod\.onion',
920 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
921 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
922 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
923 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
924 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
925 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
926 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
927 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
928 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
929 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
930 )
931 _VALID_URL = r"""(?x)^
932 (
933 (?:https?://|//) # http(s):// or protocol-independent URL
934 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
935 (?:www\.)?deturl\.com/www\.youtube\.com|
936 (?:www\.)?pwnyoutube\.com|
937 (?:www\.)?hooktube\.com|
938 (?:www\.)?yourepeat\.com|
939 tube\.majestyc\.net|
940 %(invidious)s|
941 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
942 (?:.*?\#/)? # handle anchor (#/) redirect urls
943 (?: # the various things that can precede the ID:
944 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
945 |(?: # or the v= param in all its forms
946 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
947 (?:\?|\#!?) # the params delimiter ? or # or #!
948 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
949 v=
950 )
951 ))
952 |(?:
953 youtu\.be| # just youtu.be/xxxx
954 vid\.plus| # or vid.plus/xxxx
955 zwearz\.com/watch| # or zwearz.com/watch/xxxx
956 %(invidious)s
957 )/
958 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
959 )
960 )? # all until now is optional -> you can pass the naked ID
961 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
962 (?(1).+)? # if we found the ID, everything can follow
963 (?:\#|$)""" % {
964 'invidious': '|'.join(_INVIDIOUS_SITES),
965 }
966 _PLAYER_INFO_RE = (
967 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
968 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
969 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
970 )
971 _formats = {
972 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
973 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
974 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
975 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
976 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
977 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
978 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
979 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
980 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
981 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
982 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
983 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
984 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
985 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
986 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
987 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
988 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
989 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
990
991
992 # 3D videos
993 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
994 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
995 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
996 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
997 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
998 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
999 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1000
1001 # Apple HTTP Live Streaming
1002 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1003 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1004 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1005 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1006 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1007 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1008 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1009 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1010
1011 # DASH mp4 video
1012 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1013 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1014 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1015 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1016 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1017 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1018 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1019 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1020 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1021 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1022 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1023 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1024
1025 # Dash mp4 audio
1026 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1027 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1028 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1029 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1030 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1031 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1032 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1033
1034 # Dash webm
1035 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1036 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1037 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1038 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1039 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1040 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1041 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1042 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1043 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1044 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1045 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1046 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1047 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1048 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1049 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1050 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1051 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1052 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1053 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1054 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1055 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1056 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1057
1058 # Dash webm audio
1059 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1060 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1061
1062 # Dash webm audio with opus inside
1063 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1064 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1065 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1066
1067 # RTMP (unnamed)
1068 '_rtmp': {'protocol': 'rtmp'},
1069
1070 # av01 video only formats sometimes served with "unknown" codecs
1071 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1072 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1073 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1074 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1075 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1076 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1077 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1078 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1079 }
1080 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1081
1082 _GEO_BYPASS = False
1083
1084 IE_NAME = 'youtube'
1085 _TESTS = [
1086 {
1087 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1088 'info_dict': {
1089 'id': 'BaW_jenozKc',
1090 'ext': 'mp4',
1091 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1092 'uploader': 'Philipp Hagemeister',
1093 'uploader_id': 'phihag',
1094 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1095 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1096 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1097 'upload_date': '20121002',
1098 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1099 'categories': ['Science & Technology'],
1100 'tags': ['youtube-dl'],
1101 'duration': 10,
1102 'view_count': int,
1103 'like_count': int,
1104 'dislike_count': int,
1105 'start_time': 1,
1106 'end_time': 9,
1107 }
1108 },
1109 {
1110 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1111 'note': 'Embed-only video (#1746)',
1112 'info_dict': {
1113 'id': 'yZIXLfi8CZQ',
1114 'ext': 'mp4',
1115 'upload_date': '20120608',
1116 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1117 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1118 'uploader': 'SET India',
1119 'uploader_id': 'setindia',
1120 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1121 'age_limit': 18,
1122 },
1123 'skip': 'Private video',
1124 },
1125 {
1126 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1127 'note': 'Use the first video ID in the URL',
1128 'info_dict': {
1129 'id': 'BaW_jenozKc',
1130 'ext': 'mp4',
1131 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1132 'uploader': 'Philipp Hagemeister',
1133 'uploader_id': 'phihag',
1134 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1135 'upload_date': '20121002',
1136 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1137 'categories': ['Science & Technology'],
1138 'tags': ['youtube-dl'],
1139 'duration': 10,
1140 'view_count': int,
1141 'like_count': int,
1142 'dislike_count': int,
1143 },
1144 'params': {
1145 'skip_download': True,
1146 },
1147 },
1148 {
1149 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1150 'note': '256k DASH audio (format 141) via DASH manifest',
1151 'info_dict': {
1152 'id': 'a9LDPn-MO4I',
1153 'ext': 'm4a',
1154 'upload_date': '20121002',
1155 'uploader_id': '8KVIDEO',
1156 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1157 'description': '',
1158 'uploader': '8KVIDEO',
1159 'title': 'UHDTV TEST 8K VIDEO.mp4'
1160 },
1161 'params': {
1162 'youtube_include_dash_manifest': True,
1163 'format': '141',
1164 },
1165 'skip': 'format 141 not served anymore',
1166 },
1167 # DASH manifest with encrypted signature
1168 {
1169 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1170 'info_dict': {
1171 'id': 'IB3lcPjvWLA',
1172 'ext': 'm4a',
1173 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1174 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1175 'duration': 244,
1176 'uploader': 'AfrojackVEVO',
1177 'uploader_id': 'AfrojackVEVO',
1178 'upload_date': '20131011',
1179 'abr': 129.495,
1180 },
1181 'params': {
1182 'youtube_include_dash_manifest': True,
1183 'format': '141/bestaudio[ext=m4a]',
1184 },
1185 },
1186 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1187 {
1188 'note': 'Embed allowed age-gate video',
1189 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1190 'info_dict': {
1191 'id': 'HtVdAasjOgU',
1192 'ext': 'mp4',
1193 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1194 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1195 'duration': 142,
1196 'uploader': 'The Witcher',
1197 'uploader_id': 'WitcherGame',
1198 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1199 'upload_date': '20140605',
1200 'age_limit': 18,
1201 },
1202 },
1203 {
1204 'note': 'Age-gate video with embed allowed in public site',
1205 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1206 'info_dict': {
1207 'id': 'HsUATh_Nc2U',
1208 'ext': 'mp4',
1209 'title': 'Godzilla 2 (Official Video)',
1210 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1211 'upload_date': '20200408',
1212 'uploader_id': 'FlyingKitty900',
1213 'uploader': 'FlyingKitty',
1214 'age_limit': 18,
1215 },
1216 },
1217 {
1218 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1219 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1220 'info_dict': {
1221 'id': 'Tq92D6wQ1mg',
1222 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1223 'ext': 'mp4',
1224 'upload_date': '20191227',
1225 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1226 'uploader': 'Projekt Melody',
1227 'description': 'md5:17eccca93a786d51bc67646756894066',
1228 'age_limit': 18,
1229 },
1230 },
1231 {
1232 'note': 'Non-Agegated non-embeddable video',
1233 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1234 'info_dict': {
1235 'id': 'MeJVWBSsPAY',
1236 'ext': 'mp4',
1237 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1238 'uploader': 'Herr Lurik',
1239 'uploader_id': 'st3in234',
1240 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1241 'upload_date': '20130730',
1242 },
1243 },
1244 {
1245 'note': 'Non-bypassable age-gated video',
1246 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1247 'only_matching': True,
1248 },
1249 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1250 # YouTube Red ad is not captured for creator
1251 {
1252 'url': '__2ABJjxzNo',
1253 'info_dict': {
1254 'id': '__2ABJjxzNo',
1255 'ext': 'mp4',
1256 'duration': 266,
1257 'upload_date': '20100430',
1258 'uploader_id': 'deadmau5',
1259 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1260 'creator': 'deadmau5',
1261 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1262 'uploader': 'deadmau5',
1263 'title': 'Deadmau5 - Some Chords (HD)',
1264 'alt_title': 'Some Chords',
1265 },
1266 'expected_warnings': [
1267 'DASH manifest missing',
1268 ]
1269 },
1270 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1271 {
1272 'url': 'lqQg6PlCWgI',
1273 'info_dict': {
1274 'id': 'lqQg6PlCWgI',
1275 'ext': 'mp4',
1276 'duration': 6085,
1277 'upload_date': '20150827',
1278 'uploader_id': 'olympic',
1279 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1280 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1281 'uploader': 'Olympics',
1282 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1283 },
1284 'params': {
1285 'skip_download': 'requires avconv',
1286 }
1287 },
1288 # Non-square pixels
1289 {
1290 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1291 'info_dict': {
1292 'id': '_b-2C3KPAM0',
1293 'ext': 'mp4',
1294 'stretched_ratio': 16 / 9.,
1295 'duration': 85,
1296 'upload_date': '20110310',
1297 'uploader_id': 'AllenMeow',
1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1299 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1300 'uploader': '孫ᄋᄅ',
1301 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1302 },
1303 },
1304 # url_encoded_fmt_stream_map is empty string
1305 {
1306 'url': 'qEJwOuvDf7I',
1307 'info_dict': {
1308 'id': 'qEJwOuvDf7I',
1309 'ext': 'webm',
1310 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1311 'description': '',
1312 'upload_date': '20150404',
1313 'uploader_id': 'spbelect',
1314 'uploader': 'Наблюдатели Петербурга',
1315 },
1316 'params': {
1317 'skip_download': 'requires avconv',
1318 },
1319 'skip': 'This live event has ended.',
1320 },
1321 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1322 {
1323 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1324 'info_dict': {
1325 'id': 'FIl7x6_3R5Y',
1326 'ext': 'webm',
1327 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1328 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1329 'duration': 220,
1330 'upload_date': '20150625',
1331 'uploader_id': 'dorappi2000',
1332 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1333 'uploader': 'dorappi2000',
1334 'formats': 'mincount:31',
1335 },
1336 'skip': 'not actual anymore',
1337 },
1338 # DASH manifest with segment_list
1339 {
1340 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1341 'md5': '8ce563a1d667b599d21064e982ab9e31',
1342 'info_dict': {
1343 'id': 'CsmdDsKjzN8',
1344 'ext': 'mp4',
1345 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1346 'uploader': 'Airtek',
1347 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1348 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1349 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1350 },
1351 'params': {
1352 'youtube_include_dash_manifest': True,
1353 'format': '135', # bestvideo
1354 },
1355 'skip': 'This live event has ended.',
1356 },
1357 {
1358 # Multifeed videos (multiple cameras), URL is for Main Camera
1359 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1360 'info_dict': {
1361 'id': 'jvGDaLqkpTg',
1362 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1363 'description': 'md5:e03b909557865076822aa169218d6a5d',
1364 },
1365 'playlist': [{
1366 'info_dict': {
1367 'id': 'jvGDaLqkpTg',
1368 'ext': 'mp4',
1369 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1370 'description': 'md5:e03b909557865076822aa169218d6a5d',
1371 'duration': 10643,
1372 'upload_date': '20161111',
1373 'uploader': 'Team PGP',
1374 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1375 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1376 },
1377 }, {
1378 'info_dict': {
1379 'id': '3AKt1R1aDnw',
1380 'ext': 'mp4',
1381 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1382 'description': 'md5:e03b909557865076822aa169218d6a5d',
1383 'duration': 10991,
1384 'upload_date': '20161111',
1385 'uploader': 'Team PGP',
1386 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1387 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1388 },
1389 }, {
1390 'info_dict': {
1391 'id': 'RtAMM00gpVc',
1392 'ext': 'mp4',
1393 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1394 'description': 'md5:e03b909557865076822aa169218d6a5d',
1395 'duration': 10995,
1396 'upload_date': '20161111',
1397 'uploader': 'Team PGP',
1398 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1399 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1400 },
1401 }, {
1402 'info_dict': {
1403 'id': '6N2fdlP3C5U',
1404 'ext': 'mp4',
1405 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1406 'description': 'md5:e03b909557865076822aa169218d6a5d',
1407 'duration': 10990,
1408 'upload_date': '20161111',
1409 'uploader': 'Team PGP',
1410 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1411 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1412 },
1413 }],
1414 'params': {
1415 'skip_download': True,
1416 },
1417 'skip': 'Not multifeed anymore',
1418 },
1419 {
1420 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1421 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1422 'info_dict': {
1423 'id': 'gVfLd0zydlo',
1424 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1425 },
1426 'playlist_count': 2,
1427 'skip': 'Not multifeed anymore',
1428 },
1429 {
1430 'url': 'https://vid.plus/FlRa-iH7PGw',
1431 'only_matching': True,
1432 },
1433 {
1434 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1435 'only_matching': True,
1436 },
1437 {
1438 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1439 # Also tests cut-off URL expansion in video description (see
1440 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1441 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1442 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1443 'info_dict': {
1444 'id': 'lsguqyKfVQg',
1445 'ext': 'mp4',
1446 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1447 'alt_title': 'Dark Walk',
1448 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1449 'duration': 133,
1450 'upload_date': '20151119',
1451 'uploader_id': 'IronSoulElf',
1452 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1453 'uploader': 'IronSoulElf',
1454 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1455 'track': 'Dark Walk',
1456 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1457 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1458 },
1459 'params': {
1460 'skip_download': True,
1461 },
1462 },
1463 {
1464 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1465 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1466 'only_matching': True,
1467 },
1468 {
1469 # Video with yt:stretch=17:0
1470 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1471 'info_dict': {
1472 'id': 'Q39EVAstoRM',
1473 'ext': 'mp4',
1474 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1475 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1476 'upload_date': '20151107',
1477 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1478 'uploader': 'CH GAMER DROID',
1479 },
1480 'params': {
1481 'skip_download': True,
1482 },
1483 'skip': 'This video does not exist.',
1484 },
1485 {
1486 # Video with incomplete 'yt:stretch=16:'
1487 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1488 'only_matching': True,
1489 },
1490 {
1491 # Video licensed under Creative Commons
1492 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1493 'info_dict': {
1494 'id': 'M4gD1WSo5mA',
1495 'ext': 'mp4',
1496 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1497 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1498 'duration': 721,
1499 'upload_date': '20150127',
1500 'uploader_id': 'BerkmanCenter',
1501 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1502 'uploader': 'The Berkman Klein Center for Internet & Society',
1503 'license': 'Creative Commons Attribution license (reuse allowed)',
1504 },
1505 'params': {
1506 'skip_download': True,
1507 },
1508 },
1509 {
1510 # Channel-like uploader_url
1511 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1512 'info_dict': {
1513 'id': 'eQcmzGIKrzg',
1514 'ext': 'mp4',
1515 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1516 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1517 'duration': 4060,
1518 'upload_date': '20151119',
1519 'uploader': 'Bernie Sanders',
1520 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1521 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1522 'license': 'Creative Commons Attribution license (reuse allowed)',
1523 },
1524 'params': {
1525 'skip_download': True,
1526 },
1527 },
1528 {
1529 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1530 'only_matching': True,
1531 },
1532 {
1533 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1534 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1535 'only_matching': True,
1536 },
1537 {
1538 # Rental video preview
1539 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1540 'info_dict': {
1541 'id': 'uGpuVWrhIzE',
1542 'ext': 'mp4',
1543 'title': 'Piku - Trailer',
1544 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1545 'upload_date': '20150811',
1546 'uploader': 'FlixMatrix',
1547 'uploader_id': 'FlixMatrixKaravan',
1548 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1549 'license': 'Standard YouTube License',
1550 },
1551 'params': {
1552 'skip_download': True,
1553 },
1554 'skip': 'This video is not available.',
1555 },
1556 {
1557 # YouTube Red video with episode data
1558 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1559 'info_dict': {
1560 'id': 'iqKdEhx-dD4',
1561 'ext': 'mp4',
1562 'title': 'Isolation - Mind Field (Ep 1)',
1563 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1564 'duration': 2085,
1565 'upload_date': '20170118',
1566 'uploader': 'Vsauce',
1567 'uploader_id': 'Vsauce',
1568 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1569 'series': 'Mind Field',
1570 'season_number': 1,
1571 'episode_number': 1,
1572 },
1573 'params': {
1574 'skip_download': True,
1575 },
1576 'expected_warnings': [
1577 'Skipping DASH manifest',
1578 ],
1579 },
1580 {
1581 # The following content has been identified by the YouTube community
1582 # as inappropriate or offensive to some audiences.
1583 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1584 'info_dict': {
1585 'id': '6SJNVb0GnPI',
1586 'ext': 'mp4',
1587 'title': 'Race Differences in Intelligence',
1588 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1589 'duration': 965,
1590 'upload_date': '20140124',
1591 'uploader': 'New Century Foundation',
1592 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1593 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1594 },
1595 'params': {
1596 'skip_download': True,
1597 },
1598 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1599 },
1600 {
1601 # itag 212
1602 'url': '1t24XAntNCY',
1603 'only_matching': True,
1604 },
1605 {
1606 # geo restricted to JP
1607 'url': 'sJL6WA-aGkQ',
1608 'only_matching': True,
1609 },
1610 {
1611 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1612 'only_matching': True,
1613 },
1614 {
1615 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1616 'only_matching': True,
1617 },
1618 {
1619 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1620 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1621 'only_matching': True,
1622 },
1623 {
1624 # DRM protected
1625 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1626 'only_matching': True,
1627 },
1628 {
1629 # Video with unsupported adaptive stream type formats
1630 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1631 'info_dict': {
1632 'id': 'Z4Vy8R84T1U',
1633 'ext': 'mp4',
1634 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1635 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1636 'duration': 433,
1637 'upload_date': '20130923',
1638 'uploader': 'Amelia Putri Harwita',
1639 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1640 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1641 'formats': 'maxcount:10',
1642 },
1643 'params': {
1644 'skip_download': True,
1645 'youtube_include_dash_manifest': False,
1646 },
1647 'skip': 'not actual anymore',
1648 },
1649 {
1650 # Youtube Music Auto-generated description
1651 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1652 'info_dict': {
1653 'id': 'MgNrAu2pzNs',
1654 'ext': 'mp4',
1655 'title': 'Voyeur Girl',
1656 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1657 'upload_date': '20190312',
1658 'uploader': 'Stephen - Topic',
1659 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1660 'artist': 'Stephen',
1661 'track': 'Voyeur Girl',
1662 'album': 'it\'s too much love to know my dear',
1663 'release_date': '20190313',
1664 'release_year': 2019,
1665 },
1666 'params': {
1667 'skip_download': True,
1668 },
1669 },
1670 {
1671 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1672 'only_matching': True,
1673 },
1674 {
1675 # invalid -> valid video id redirection
1676 'url': 'DJztXj2GPfl',
1677 'info_dict': {
1678 'id': 'DJztXj2GPfk',
1679 'ext': 'mp4',
1680 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1681 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1682 'upload_date': '20090125',
1683 'uploader': 'Prochorowka',
1684 'uploader_id': 'Prochorowka',
1685 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1686 'artist': 'Panjabi MC',
1687 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1688 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1689 },
1690 'params': {
1691 'skip_download': True,
1692 },
1693 'skip': 'Video unavailable',
1694 },
1695 {
1696 # empty description results in an empty string
1697 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1698 'info_dict': {
1699 'id': 'x41yOUIvK2k',
1700 'ext': 'mp4',
1701 'title': 'IMG 3456',
1702 'description': '',
1703 'upload_date': '20170613',
1704 'uploader_id': 'ElevageOrVert',
1705 'uploader': 'ElevageOrVert',
1706 },
1707 'params': {
1708 'skip_download': True,
1709 },
1710 },
1711 {
1712 # with '};' inside yt initial data (see [1])
1713 # see [2] for an example with '};' inside ytInitialPlayerResponse
1714 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1715 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1716 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1717 'info_dict': {
1718 'id': 'CHqg6qOn4no',
1719 'ext': 'mp4',
1720 'title': 'Part 77 Sort a list of simple types in c#',
1721 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1722 'upload_date': '20130831',
1723 'uploader_id': 'kudvenkat',
1724 'uploader': 'kudvenkat',
1725 },
1726 'params': {
1727 'skip_download': True,
1728 },
1729 },
1730 {
1731 # another example of '};' in ytInitialData
1732 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1733 'only_matching': True,
1734 },
1735 {
1736 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1737 'only_matching': True,
1738 },
1739 {
1740 # https://github.com/ytdl-org/youtube-dl/pull/28094
1741 'url': 'OtqTfy26tG0',
1742 'info_dict': {
1743 'id': 'OtqTfy26tG0',
1744 'ext': 'mp4',
1745 'title': 'Burn Out',
1746 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1747 'upload_date': '20141120',
1748 'uploader': 'The Cinematic Orchestra - Topic',
1749 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1750 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1751 'artist': 'The Cinematic Orchestra',
1752 'track': 'Burn Out',
1753 'album': 'Every Day',
1754 'release_data': None,
1755 'release_year': None,
1756 },
1757 'params': {
1758 'skip_download': True,
1759 },
1760 },
1761 {
1762 # controversial video, only works with bpctr when authenticated with cookies
1763 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1764 'only_matching': True,
1765 },
1766 {
1767 # controversial video, requires bpctr/contentCheckOk
1768 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1769 'info_dict': {
1770 'id': 'SZJvDhaSDnc',
1771 'ext': 'mp4',
1772 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1773 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1774 'uploader': 'CBS This Morning',
1775 'uploader_id': 'CBSThisMorning',
1776 'upload_date': '20140716',
1777 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1778 }
1779 },
1780 {
1781 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1782 'url': 'cBvYw8_A0vQ',
1783 'info_dict': {
1784 'id': 'cBvYw8_A0vQ',
1785 'ext': 'mp4',
1786 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1787 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1788 'upload_date': '20201120',
1789 'uploader': 'Walk around Japan',
1790 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1791 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1792 },
1793 'params': {
1794 'skip_download': True,
1795 },
1796 }, {
1797 # Has multiple audio streams
1798 'url': 'WaOKSUlf4TM',
1799 'only_matching': True
1800 }, {
1801 # Requires Premium: has format 141 when requested using YTM url
1802 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1803 'only_matching': True
1804 }, {
1805 # multiple subtitles with same lang_code
1806 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1807 'only_matching': True,
1808 }, {
1809 # Force use android client fallback
1810 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1811 'info_dict': {
1812 'id': 'YOelRv7fMxY',
1813 'title': 'DIGGING A SECRET TUNNEL Part 1',
1814 'ext': '3gp',
1815 'upload_date': '20210624',
1816 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1817 'uploader': 'colinfurze',
1818 'uploader_id': 'colinfurze',
1819 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1820 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1821 },
1822 'params': {
1823 'format': '17', # 3gp format available on android
1824 'extractor_args': {'youtube': {'player_client': ['android']}},
1825 },
1826 },
1827 {
1828 # Skip download of additional client configs (remix client config in this case)
1829 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1830 'only_matching': True,
1831 'params': {
1832 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1833 },
1834 }, {
1835 # shorts
1836 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1837 'only_matching': True,
1838 },
1839 ]
1840
1841 @classmethod
1842 def suitable(cls, url):
1843 from ..utils import parse_qs
1844
1845 qs = parse_qs(url)
1846 if qs.get('list', [None])[0]:
1847 return False
1848 return super(YoutubeIE, cls).suitable(url)
1849
1850 def __init__(self, *args, **kwargs):
1851 super(YoutubeIE, self).__init__(*args, **kwargs)
1852 self._code_cache = {}
1853 self._player_cache = {}
1854
1855 def _extract_player_url(self, ytcfg=None, webpage=None):
1856 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1857 if not player_url and webpage:
1858 player_url = self._search_regex(
1859 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1860 webpage, 'player URL', fatal=False)
1861 if not player_url:
1862 return None
1863 if player_url.startswith('//'):
1864 player_url = 'https:' + player_url
1865 elif not re.match(r'https?://', player_url):
1866 player_url = compat_urlparse.urljoin(
1867 'https://www.youtube.com', player_url)
1868 return player_url
1869
1870 def _signature_cache_id(self, example_sig):
1871 """ Return a string representation of a signature """
1872 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1873
1874 @classmethod
1875 def _extract_player_info(cls, player_url):
1876 for player_re in cls._PLAYER_INFO_RE:
1877 id_m = re.search(player_re, player_url)
1878 if id_m:
1879 break
1880 else:
1881 raise ExtractorError('Cannot identify player %r' % player_url)
1882 return id_m.group('id')
1883
1884 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1885 player_id = self._extract_player_info(player_url)
1886 if player_id not in self._code_cache:
1887 self._code_cache[player_id] = self._download_webpage(
1888 player_url, video_id, fatal=fatal,
1889 note='Downloading player ' + player_id,
1890 errnote='Download of %s failed' % player_url)
1891 return player_id in self._code_cache
1892
1893 def _extract_signature_function(self, video_id, player_url, example_sig):
1894 player_id = self._extract_player_info(player_url)
1895
1896 # Read from filesystem cache
1897 func_id = 'js_%s_%s' % (
1898 player_id, self._signature_cache_id(example_sig))
1899 assert os.path.basename(func_id) == func_id
1900
1901 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1902 if cache_spec is not None:
1903 return lambda s: ''.join(s[i] for i in cache_spec)
1904
1905 if self._load_player(video_id, player_url):
1906 code = self._code_cache[player_id]
1907 res = self._parse_sig_js(code)
1908
1909 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1910 cache_res = res(test_string)
1911 cache_spec = [ord(c) for c in cache_res]
1912
1913 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1914 return res
1915
1916 def _print_sig_code(self, func, example_sig):
1917 def gen_sig_code(idxs):
1918 def _genslice(start, end, step):
1919 starts = '' if start == 0 else str(start)
1920 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1921 steps = '' if step == 1 else (':%d' % step)
1922 return 's[%s%s%s]' % (starts, ends, steps)
1923
1924 step = None
1925 # Quelch pyflakes warnings - start will be set when step is set
1926 start = '(Never used)'
1927 for i, prev in zip(idxs[1:], idxs[:-1]):
1928 if step is not None:
1929 if i - prev == step:
1930 continue
1931 yield _genslice(start, prev, step)
1932 step = None
1933 continue
1934 if i - prev in [-1, 1]:
1935 step = i - prev
1936 start = prev
1937 continue
1938 else:
1939 yield 's[%d]' % prev
1940 if step is None:
1941 yield 's[%d]' % i
1942 else:
1943 yield _genslice(start, i, step)
1944
1945 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1946 cache_res = func(test_string)
1947 cache_spec = [ord(c) for c in cache_res]
1948 expr_code = ' + '.join(gen_sig_code(cache_spec))
1949 signature_id_tuple = '(%s)' % (
1950 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1951 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1952 ' return %s\n') % (signature_id_tuple, expr_code)
1953 self.to_screen('Extracted signature function:\n' + code)
1954
1955 def _parse_sig_js(self, jscode):
1956 funcname = self._search_regex(
1957 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1958 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1959 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1960 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1961 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1962 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1963 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1964 # Obsolete patterns
1965 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1966 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1967 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1968 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1969 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1970 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1971 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1972 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1973 jscode, 'Initial JS player signature function name', group='sig')
1974
1975 jsi = JSInterpreter(jscode)
1976 initial_function = jsi.extract_function(funcname)
1977 return lambda s: initial_function([s])
1978
1979 def _decrypt_signature(self, s, video_id, player_url):
1980 """Turn the encrypted s field into a working signature"""
1981
1982 if player_url is None:
1983 raise ExtractorError('Cannot decrypt signature without player_url')
1984
1985 try:
1986 player_id = (player_url, self._signature_cache_id(s))
1987 if player_id not in self._player_cache:
1988 func = self._extract_signature_function(
1989 video_id, player_url, s
1990 )
1991 self._player_cache[player_id] = func
1992 func = self._player_cache[player_id]
1993 if self.get_param('youtube_print_sig_code'):
1994 self._print_sig_code(func, s)
1995 return func(s)
1996 except Exception as e:
1997 tb = traceback.format_exc()
1998 raise ExtractorError(
1999 'Signature extraction failed: ' + tb, cause=e)
2000
2001 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2002 """
2003 Extract signatureTimestamp (sts)
2004 Required to tell API what sig/player version is in use.
2005 """
2006 sts = None
2007 if isinstance(ytcfg, dict):
2008 sts = int_or_none(ytcfg.get('STS'))
2009
2010 if not sts:
2011 # Attempt to extract from player
2012 if player_url is None:
2013 error_msg = 'Cannot extract signature timestamp without player_url.'
2014 if fatal:
2015 raise ExtractorError(error_msg)
2016 self.report_warning(error_msg)
2017 return
2018 if self._load_player(video_id, player_url, fatal=fatal):
2019 player_id = self._extract_player_info(player_url)
2020 code = self._code_cache[player_id]
2021 sts = int_or_none(self._search_regex(
2022 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2023 'JS player signature timestamp', group='sts', fatal=fatal))
2024 return sts
2025
2026 def _mark_watched(self, video_id, player_responses):
2027 playback_url = traverse_obj(
2028 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2029 expected_type=url_or_none, get_all=False)
2030 if not playback_url:
2031 self.report_warning('Unable to mark watched')
2032 return
2033 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2034 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2035
2036 # cpn generation algorithm is reverse engineered from base.js.
2037 # In fact it works even with dummy cpn.
2038 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2039 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2040
2041 qs.update({
2042 'ver': ['2'],
2043 'cpn': [cpn],
2044 })
2045 playback_url = compat_urlparse.urlunparse(
2046 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2047
2048 self._download_webpage(
2049 playback_url, video_id, 'Marking watched',
2050 'Unable to mark watched', fatal=False)
2051
2052 @staticmethod
2053 def _extract_urls(webpage):
2054 # Embedded YouTube player
2055 entries = [
2056 unescapeHTML(mobj.group('url'))
2057 for mobj in re.finditer(r'''(?x)
2058 (?:
2059 <iframe[^>]+?src=|
2060 data-video-url=|
2061 <embed[^>]+?src=|
2062 embedSWF\(?:\s*|
2063 <object[^>]+data=|
2064 new\s+SWFObject\(
2065 )
2066 (["\'])
2067 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2068 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2069 \1''', webpage)]
2070
2071 # lazyYT YouTube embed
2072 entries.extend(list(map(
2073 unescapeHTML,
2074 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2075
2076 # Wordpress "YouTube Video Importer" plugin
2077 matches = re.findall(r'''(?x)<div[^>]+
2078 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2079 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2080 entries.extend(m[-1] for m in matches)
2081
2082 return entries
2083
2084 @staticmethod
2085 def _extract_url(webpage):
2086 urls = YoutubeIE._extract_urls(webpage)
2087 return urls[0] if urls else None
2088
2089 @classmethod
2090 def extract_id(cls, url):
2091 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2092 if mobj is None:
2093 raise ExtractorError('Invalid URL: %s' % url)
2094 return mobj.group('id')
2095
2096 def _extract_chapters_from_json(self, data, duration):
2097 chapter_list = traverse_obj(
2098 data, (
2099 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2100 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2101 ), expected_type=list)
2102
2103 return self._extract_chapters(
2104 chapter_list,
2105 chapter_time=lambda chapter: float_or_none(
2106 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2107 chapter_title=lambda chapter: traverse_obj(
2108 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2109 duration=duration)
2110
2111 def _extract_chapters_from_engagement_panel(self, data, duration):
2112 content_list = traverse_obj(
2113 data,
2114 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2115 expected_type=list, default=[])
2116 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2117 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2118
2119 return next((
2120 filter(None, (
2121 self._extract_chapters(
2122 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2123 chapter_time, chapter_title, duration)
2124 for contents in content_list
2125 ))), [])
2126
2127 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2128 chapters = []
2129 last_chapter = {'start_time': 0}
2130 for idx, chapter in enumerate(chapter_list or []):
2131 title = chapter_title(chapter)
2132 start_time = chapter_time(chapter)
2133 if start_time is None:
2134 continue
2135 last_chapter['end_time'] = start_time
2136 if start_time < last_chapter['start_time']:
2137 if idx == 1:
2138 chapters.pop()
2139 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2140 else:
2141 self.report_warning(f'Invalid start time for chapter "{title}"')
2142 continue
2143 last_chapter = {'start_time': start_time, 'title': title}
2144 chapters.append(last_chapter)
2145 last_chapter['end_time'] = duration
2146 return chapters
2147
2148 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2149 return self._parse_json(self._search_regex(
2150 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2151 regex), webpage, name, default='{}'), video_id, fatal=False)
2152
2153 @staticmethod
2154 def parse_time_text(time_text):
2155 """
2156 Parse the comment time text
2157 time_text is in the format 'X units ago (edited)'
2158 """
2159 time_text_split = time_text.split(' ')
2160 if len(time_text_split) >= 3:
2161 try:
2162 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2163 except ValueError:
2164 return None
2165
2166 def _extract_comment(self, comment_renderer, parent=None):
2167 comment_id = comment_renderer.get('commentId')
2168 if not comment_id:
2169 return
2170
2171 text = self._get_text(comment_renderer, 'contentText')
2172
2173 # note: timestamp is an estimate calculated from the current time and time_text
2174 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2175 time_text_dt = self.parse_time_text(time_text)
2176 if isinstance(time_text_dt, datetime.datetime):
2177 timestamp = calendar.timegm(time_text_dt.timetuple())
2178 author = self._get_text(comment_renderer, 'authorText')
2179 author_id = try_get(comment_renderer,
2180 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2181
2182 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2183 lambda x: x['likeCount']), compat_str)) or 0
2184 author_thumbnail = try_get(comment_renderer,
2185 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2186
2187 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2188 is_favorited = 'creatorHeart' in (try_get(
2189 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2190 return {
2191 'id': comment_id,
2192 'text': text,
2193 'timestamp': timestamp,
2194 'time_text': time_text,
2195 'like_count': votes,
2196 'is_favorited': is_favorited,
2197 'author': author,
2198 'author_id': author_id,
2199 'author_thumbnail': author_thumbnail,
2200 'author_is_uploader': author_is_uploader,
2201 'parent': parent or 'root'
2202 }
2203
2204 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2205 ytcfg, video_id, parent=None, comment_counts=None):
2206
2207 def extract_header(contents):
2208 _total_comments = 0
2209 _continuation = None
2210 for content in contents:
2211 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2212 expected_comment_count = parse_count(self._get_text(
2213 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2214
2215 if expected_comment_count:
2216 comment_counts[1] = expected_comment_count
2217 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2218 _total_comments = comment_counts[1]
2219 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2220 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2221
2222 sort_menu_item = try_get(
2223 comments_header_renderer,
2224 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2225 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2226
2227 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2228 if not _continuation:
2229 continue
2230
2231 sort_text = sort_menu_item.get('title')
2232 if isinstance(sort_text, compat_str):
2233 sort_text = sort_text.lower()
2234 else:
2235 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2236 self.to_screen('Sorting comments by %s' % sort_text)
2237 break
2238 return _total_comments, _continuation
2239
2240 def extract_thread(contents):
2241 if not parent:
2242 comment_counts[2] = 0
2243 for content in contents:
2244 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2245 comment_renderer = try_get(
2246 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2247 content, (lambda x: x['commentRenderer'], dict))
2248
2249 if not comment_renderer:
2250 continue
2251 comment = self._extract_comment(comment_renderer, parent)
2252 if not comment:
2253 continue
2254 comment_counts[0] += 1
2255 yield comment
2256 # Attempt to get the replies
2257 comment_replies_renderer = try_get(
2258 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2259
2260 if comment_replies_renderer:
2261 comment_counts[2] += 1
2262 comment_entries_iter = self._comment_entries(
2263 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2264 video_id, parent=comment.get('id'), comment_counts=comment_counts)
2265
2266 for reply_comment in comment_entries_iter:
2267 yield reply_comment
2268
2269 # YouTube comments have a max depth of 2
2270 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2271 if max_depth == 1 and parent:
2272 return
2273 if not comment_counts:
2274 # comment so far, est. total comments, current comment thread #
2275 comment_counts = [0, 0, 0]
2276
2277 continuation = self._extract_continuation(root_continuation_data)
2278 if continuation and len(continuation['continuation']) < 27:
2279 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2280 continuation_token = self._generate_comment_continuation(video_id)
2281 continuation = self._build_api_continuation_query(continuation_token, None)
2282
2283 visitor_data = None
2284 is_first_continuation = parent is None
2285
2286 for page_num in itertools.count(0):
2287 if not continuation:
2288 break
2289 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2290 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2291 if page_num == 0:
2292 if is_first_continuation:
2293 note_prefix = 'Downloading comment section API JSON'
2294 else:
2295 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2296 comment_counts[2], comment_prog_str)
2297 else:
2298 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2299 ' ' if parent else '', ' replies' if parent else '',
2300 page_num, comment_prog_str)
2301
2302 response = self._extract_response(
2303 item_id=None, query=continuation,
2304 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2305 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2306 if not response:
2307 break
2308 visitor_data = try_get(
2309 response,
2310 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2311 compat_str) or visitor_data
2312
2313 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2314
2315 continuation = None
2316 if isinstance(continuation_contents, list):
2317 for continuation_section in continuation_contents:
2318 if not isinstance(continuation_section, dict):
2319 continue
2320 continuation_items = try_get(
2321 continuation_section,
2322 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2323 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2324 list) or []
2325 if is_first_continuation:
2326 total_comments, continuation = extract_header(continuation_items)
2327 if total_comments:
2328 yield total_comments
2329 is_first_continuation = False
2330 if continuation:
2331 break
2332 continue
2333 count = 0
2334 for count, entry in enumerate(extract_thread(continuation_items)):
2335 yield entry
2336 continuation = self._extract_continuation({'contents': continuation_items})
2337 if continuation:
2338 # Sometimes YouTube provides a continuation without any comments
2339 # In most cases we end up just downloading these with very little comments to come.
2340 if count == 0:
2341 if not parent:
2342 self.report_warning('No comments received - assuming end of comments')
2343 continuation = None
2344 break
2345
2346 # Deprecated response structure
2347 elif isinstance(continuation_contents, dict):
2348 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2349 for key, continuation_renderer in continuation_contents.items():
2350 if key not in known_continuation_renderers:
2351 continue
2352 if not isinstance(continuation_renderer, dict):
2353 continue
2354 if is_first_continuation:
2355 header_continuation_items = [continuation_renderer.get('header') or {}]
2356 total_comments, continuation = extract_header(header_continuation_items)
2357 if total_comments:
2358 yield total_comments
2359 is_first_continuation = False
2360 if continuation:
2361 break
2362
2363 # Sometimes YouTube provides a continuation without any comments
2364 # In most cases we end up just downloading these with very little comments to come.
2365 count = 0
2366 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2367 yield entry
2368 continuation = self._extract_continuation(continuation_renderer)
2369 if count == 0:
2370 if not parent:
2371 self.report_warning('No comments received - assuming end of comments')
2372 continuation = None
2373 break
2374
2375 @staticmethod
2376 def _generate_comment_continuation(video_id):
2377 """
2378 Generates initial comment section continuation token from given video id
2379 """
2380 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2381 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2382 new_continuation_intlist = list(itertools.chain.from_iterable(
2383 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2384 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2385
2386 def _extract_comments(self, ytcfg, video_id, contents, webpage):
2387 """Entry for comment extraction"""
2388 def _real_comment_extract(contents):
2389 if isinstance(contents, list):
2390 for entry in contents:
2391 for key, renderer in entry.items():
2392 if key not in known_entry_comment_renderers:
2393 continue
2394 yield from self._comment_entries(
2395 renderer, video_id=video_id, ytcfg=ytcfg,
2396 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2397 account_syncid=self._extract_account_syncid(ytcfg))
2398 break
2399 comments = []
2400 known_entry_comment_renderers = ('itemSectionRenderer',)
2401 estimated_total = 0
2402 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2403 # Force English regardless of account setting to prevent parsing issues
2404 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2405 ytcfg = copy.deepcopy(ytcfg)
2406 traverse_obj(
2407 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2408 try:
2409 for comment in _real_comment_extract(contents):
2410 if len(comments) >= max_comments:
2411 break
2412 if isinstance(comment, int):
2413 estimated_total = comment
2414 continue
2415 comments.append(comment)
2416 except KeyboardInterrupt:
2417 self.to_screen('Interrupted by user')
2418 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2419 return {
2420 'comments': comments,
2421 'comment_count': len(comments),
2422 }
2423
2424 @staticmethod
2425 def _generate_player_context(sts=None):
2426 context = {
2427 'html5Preference': 'HTML5_PREF_WANTS',
2428 }
2429 if sts is not None:
2430 context['signatureTimestamp'] = sts
2431 return {
2432 'playbackContext': {
2433 'contentPlaybackContext': context
2434 },
2435 'contentCheckOk': True,
2436 'racyCheckOk': True
2437 }
2438
2439 @staticmethod
2440 def _is_agegated(player_response):
2441 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2442 return True
2443
2444 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2445 AGE_GATE_REASONS = (
2446 'confirm your age', 'age-restricted', 'inappropriate', # reason
2447 'age_verification_required', 'age_check_required', # status
2448 )
2449 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2450
2451 @staticmethod
2452 def _is_unplayable(player_response):
2453 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2454
2455 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2456
2457 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2458 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2459 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2460 headers = self.generate_api_headers(
2461 player_ytcfg, identity_token, syncid,
2462 default_client=client, session_index=session_index)
2463
2464 yt_query = {'videoId': video_id}
2465 yt_query.update(self._generate_player_context(sts))
2466 return self._extract_response(
2467 item_id=video_id, ep='player', query=yt_query,
2468 ytcfg=player_ytcfg, headers=headers, fatal=True,
2469 default_client=client,
2470 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2471 ) or None
2472
2473 def _get_requested_clients(self, url, smuggled_data):
2474 requested_clients = []
2475 allowed_clients = sorted(
2476 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2477 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2478 for client in self._configuration_arg('player_client'):
2479 if client in allowed_clients:
2480 requested_clients.append(client)
2481 elif client == 'all':
2482 requested_clients.extend(allowed_clients)
2483 else:
2484 self.report_warning(f'Skipping unsupported client {client}')
2485 if not requested_clients:
2486 requested_clients = ['android', 'web']
2487
2488 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2489 requested_clients.extend(
2490 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2491
2492 return orderedSet(requested_clients)
2493
2494 def _extract_player_ytcfg(self, client, video_id):
2495 url = {
2496 'web_music': 'https://music.youtube.com',
2497 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2498 }.get(client)
2499 if not url:
2500 return {}
2501 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2502 return self.extract_ytcfg(video_id, webpage) or {}
2503
2504 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2505 initial_pr = None
2506 if webpage:
2507 initial_pr = self._extract_yt_initial_variable(
2508 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2509 video_id, 'initial player response')
2510
2511 original_clients = clients
2512 clients = clients[::-1]
2513
2514 def append_client(client_name):
2515 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2516 clients.append(client_name)
2517
2518 # Android player_response does not have microFormats which are needed for
2519 # extraction of some data. So we return the initial_pr with formats
2520 # stripped out even if not requested by the user
2521 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2522 yielded_pr = False
2523 if initial_pr:
2524 pr = dict(initial_pr)
2525 pr['streamingData'] = None
2526 yielded_pr = True
2527 yield pr
2528
2529 last_error = None
2530 while clients:
2531 client = clients.pop()
2532 player_ytcfg = master_ytcfg if client == 'web' else {}
2533 if 'configs' not in self._configuration_arg('player_skip'):
2534 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2535
2536 try:
2537 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2538 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2539 except ExtractorError as e:
2540 if last_error:
2541 self.report_warning(last_error)
2542 last_error = e
2543 continue
2544
2545 if pr:
2546 yielded_pr = True
2547 yield pr
2548
2549 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2550 if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header():
2551 append_client(client.replace('_agegate', '_creator'))
2552 elif self._is_agegated(pr):
2553 append_client(f'{client}_agegate')
2554
2555 if last_error:
2556 if not yielded_pr:
2557 raise last_error
2558 self.report_warning(last_error)
2559
2560 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2561 itags, stream_ids = [], []
2562 itag_qualities, res_qualities = {}, {}
2563 q = qualities([
2564 # Normally tiny is the smallest video-only formats. But
2565 # audio-only formats with unknown quality may get tagged as tiny
2566 'tiny',
2567 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2568 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2569 ])
2570 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2571
2572 for fmt in streaming_formats:
2573 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2574 continue
2575
2576 itag = str_or_none(fmt.get('itag'))
2577 audio_track = fmt.get('audioTrack') or {}
2578 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2579 if stream_id in stream_ids:
2580 continue
2581
2582 quality = fmt.get('quality')
2583 height = int_or_none(fmt.get('height'))
2584 if quality == 'tiny' or not quality:
2585 quality = fmt.get('audioQuality', '').lower() or quality
2586 # The 3gp format (17) in android client has a quality of "small",
2587 # but is actually worse than other formats
2588 if itag == '17':
2589 quality = 'tiny'
2590 if quality:
2591 if itag:
2592 itag_qualities[itag] = quality
2593 if height:
2594 res_qualities[height] = quality
2595 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2596 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2597 # number of fragment that would subsequently requested with (`&sq=N`)
2598 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2599 continue
2600
2601 fmt_url = fmt.get('url')
2602 if not fmt_url:
2603 sc = compat_parse_qs(fmt.get('signatureCipher'))
2604 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2605 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2606 if not (sc and fmt_url and encrypted_sig):
2607 continue
2608 if not player_url:
2609 continue
2610 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2611 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2612 fmt_url += '&' + sp + '=' + signature
2613
2614 if itag:
2615 itags.append(itag)
2616 stream_ids.append(stream_id)
2617
2618 tbr = float_or_none(
2619 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2620 dct = {
2621 'asr': int_or_none(fmt.get('audioSampleRate')),
2622 'filesize': int_or_none(fmt.get('contentLength')),
2623 'format_id': itag,
2624 'format_note': ', '.join(filter(None, (
2625 '%s%s' % (audio_track.get('displayName') or '',
2626 ' (default)' if audio_track.get('audioIsDefault') else ''),
2627 fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2628 'fps': int_or_none(fmt.get('fps')),
2629 'height': height,
2630 'quality': q(quality),
2631 'tbr': tbr,
2632 'url': fmt_url,
2633 'width': int_or_none(fmt.get('width')),
2634 'language': audio_track.get('id', '').split('.')[0],
2635 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2636 }
2637 mime_mobj = re.match(
2638 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2639 if mime_mobj:
2640 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2641 dct.update(parse_codecs(mime_mobj.group(2)))
2642 no_audio = dct.get('acodec') == 'none'
2643 no_video = dct.get('vcodec') == 'none'
2644 if no_audio:
2645 dct['vbr'] = tbr
2646 if no_video:
2647 dct['abr'] = tbr
2648 if no_audio or no_video:
2649 dct['downloader_options'] = {
2650 # Youtube throttles chunks >~10M
2651 'http_chunk_size': 10485760,
2652 }
2653 if dct.get('ext'):
2654 dct['container'] = dct['ext'] + '_dash'
2655 yield dct
2656
2657 skip_manifests = self._configuration_arg('skip')
2658 get_dash = (
2659 (not is_live or self._configuration_arg('include_live_dash'))
2660 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2661 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2662
2663 def guess_quality(f):
2664 for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2665 if val in qdict:
2666 return q(qdict[val])
2667 return -1
2668
2669 for sd in streaming_data:
2670 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2671 if hls_manifest_url:
2672 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2673 itag = self._search_regex(
2674 r'/itag/(\d+)', f['url'], 'itag', default=None)
2675 if itag in itags:
2676 continue
2677 if itag:
2678 f['format_id'] = itag
2679 itags.append(itag)
2680 f['quality'] = guess_quality(f)
2681 yield f
2682
2683 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2684 if dash_manifest_url:
2685 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2686 itag = f['format_id']
2687 if itag in itags:
2688 continue
2689 if itag:
2690 itags.append(itag)
2691 f['quality'] = guess_quality(f)
2692 filesize = int_or_none(self._search_regex(
2693 r'/clen/(\d+)', f.get('fragment_base_url')
2694 or f['url'], 'file size', default=None))
2695 if filesize:
2696 f['filesize'] = filesize
2697 yield f
2698
2699 def _real_extract(self, url):
2700 url, smuggled_data = unsmuggle_url(url, {})
2701 video_id = self._match_id(url)
2702
2703 base_url = self.http_scheme() + '//www.youtube.com/'
2704 webpage_url = base_url + 'watch?v=' + video_id
2705 webpage = self._download_webpage(
2706 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2707
2708 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2709 player_url = self._extract_player_url(master_ytcfg, webpage)
2710 identity_token = self._extract_identity_token(webpage, video_id)
2711
2712 player_responses = list(self._extract_player_responses(
2713 self._get_requested_clients(url, smuggled_data),
2714 video_id, webpage, master_ytcfg, player_url, identity_token))
2715
2716 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2717
2718 playability_statuses = traverse_obj(
2719 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2720
2721 trailer_video_id = get_first(
2722 playability_statuses,
2723 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2724 expected_type=str)
2725 if trailer_video_id:
2726 return self.url_result(
2727 trailer_video_id, self.ie_key(), trailer_video_id)
2728
2729 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2730 if webpage else (lambda x: None))
2731
2732 video_details = traverse_obj(
2733 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2734 microformats = traverse_obj(
2735 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2736 expected_type=dict, default=[])
2737 video_title = (
2738 get_first(video_details, 'title')
2739 or self._get_text(microformats, (..., 'title'))
2740 or search_meta(['og:title', 'twitter:title', 'title']))
2741 video_description = get_first(video_details, 'shortDescription')
2742
2743 if not smuggled_data.get('force_singlefeed', False):
2744 if not self.get_param('noplaylist'):
2745 multifeed_metadata_list = get_first(
2746 player_responses,
2747 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2748 expected_type=str)
2749 if multifeed_metadata_list:
2750 entries = []
2751 feed_ids = []
2752 for feed in multifeed_metadata_list.split(','):
2753 # Unquote should take place before split on comma (,) since textual
2754 # fields may contain comma as well (see
2755 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2756 feed_data = compat_parse_qs(
2757 compat_urllib_parse_unquote_plus(feed))
2758
2759 def feed_entry(name):
2760 return try_get(
2761 feed_data, lambda x: x[name][0], compat_str)
2762
2763 feed_id = feed_entry('id')
2764 if not feed_id:
2765 continue
2766 feed_title = feed_entry('title')
2767 title = video_title
2768 if feed_title:
2769 title += ' (%s)' % feed_title
2770 entries.append({
2771 '_type': 'url_transparent',
2772 'ie_key': 'Youtube',
2773 'url': smuggle_url(
2774 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2775 {'force_singlefeed': True}),
2776 'title': title,
2777 })
2778 feed_ids.append(feed_id)
2779 self.to_screen(
2780 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2781 % (', '.join(feed_ids), video_id))
2782 return self.playlist_result(
2783 entries, video_id, video_title, video_description)
2784 else:
2785 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2786
2787 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2788 is_live = get_first(video_details, 'isLive')
2789 if is_live is None:
2790 is_live = get_first(live_broadcast_details, 'isLiveNow')
2791
2792 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2793 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2794
2795 if not formats:
2796 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2797 self.report_drm(video_id)
2798 pemr = get_first(
2799 playability_statuses,
2800 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2801 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2802 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2803 if subreason:
2804 if subreason == 'The uploader has not made this video available in your country.':
2805 countries = get_first(microformats, 'availableCountries')
2806 if not countries:
2807 regions_allowed = search_meta('regionsAllowed')
2808 countries = regions_allowed.split(',') if regions_allowed else None
2809 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2810 reason += f'. {subreason}'
2811 if reason:
2812 self.raise_no_formats(reason, expected=True)
2813
2814 for f in formats:
2815 if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
2816 f['source_preference'] = -10
2817 # TODO: this method is not reliable
2818 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
2819
2820 # Source is given priority since formats that throttle are given lower source_preference
2821 # When throttling issue is fully fixed, remove this
2822 self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang'))
2823
2824 keywords = get_first(video_details, 'keywords', expected_type=list) or []
2825 if not keywords and webpage:
2826 keywords = [
2827 unescapeHTML(m.group('content'))
2828 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2829 for keyword in keywords:
2830 if keyword.startswith('yt:stretch='):
2831 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2832 if mobj:
2833 # NB: float is intentional for forcing float division
2834 w, h = (float(v) for v in mobj.groups())
2835 if w > 0 and h > 0:
2836 ratio = w / h
2837 for f in formats:
2838 if f.get('vcodec') != 'none':
2839 f['stretched_ratio'] = ratio
2840 break
2841
2842 thumbnails = []
2843 thumbnail_dicts = traverse_obj(
2844 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2845 expected_type=dict, default=[])
2846 for thumbnail in thumbnail_dicts:
2847 thumbnail_url = thumbnail.get('url')
2848 if not thumbnail_url:
2849 continue
2850 # Sometimes youtube gives a wrong thumbnail URL. See:
2851 # https://github.com/yt-dlp/yt-dlp/issues/233
2852 # https://github.com/ytdl-org/youtube-dl/issues/28023
2853 if 'maxresdefault' in thumbnail_url:
2854 thumbnail_url = thumbnail_url.split('?')[0]
2855 thumbnails.append({
2856 'url': thumbnail_url,
2857 'height': int_or_none(thumbnail.get('height')),
2858 'width': int_or_none(thumbnail.get('width')),
2859 })
2860 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2861 if thumbnail_url:
2862 thumbnails.append({
2863 'url': thumbnail_url,
2864 })
2865 # The best resolution thumbnails sometimes does not appear in the webpage
2866 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2867 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2868 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2869 # TODO: Test them also? - For some videos, even these don't exist
2870 guaranteed_thumbnail_names = [
2871 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2872 'mqdefault', 'mq1', 'mq2', 'mq3',
2873 'default', '1', '2', '3'
2874 ]
2875 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2876 n_thumbnail_names = len(thumbnail_names)
2877
2878 thumbnails.extend({
2879 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2880 video_id=video_id, name=name, ext=ext,
2881 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2882 '_test_url': name in hq_thumbnail_names,
2883 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2884 for thumb in thumbnails:
2885 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2886 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2887 self._remove_duplicate_formats(thumbnails)
2888
2889 category = get_first(microformats, 'category') or search_meta('genre')
2890 channel_id = str_or_none(
2891 get_first(video_details, 'channelId')
2892 or get_first(microformats, 'externalChannelId')
2893 or search_meta('channelId'))
2894 duration = int_or_none(
2895 get_first(video_details, 'lengthSeconds')
2896 or get_first(microformats, 'lengthSeconds')
2897 or parse_duration(search_meta('duration'))) or None
2898 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2899
2900 live_content = get_first(video_details, 'isLiveContent')
2901 is_upcoming = get_first(video_details, 'isUpcoming')
2902 if is_live is None:
2903 if is_upcoming or live_content is False:
2904 is_live = False
2905 if is_upcoming is None and (live_content or is_live):
2906 is_upcoming = False
2907 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2908 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2909 if not duration and live_endtime and live_starttime:
2910 duration = live_endtime - live_starttime
2911
2912 info = {
2913 'id': video_id,
2914 'title': self._live_title(video_title) if is_live else video_title,
2915 'formats': formats,
2916 'thumbnails': thumbnails,
2917 'description': video_description,
2918 'upload_date': unified_strdate(
2919 get_first(microformats, 'uploadDate')
2920 or search_meta('uploadDate')),
2921 'uploader': get_first(video_details, 'author'),
2922 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2923 'uploader_url': owner_profile_url,
2924 'channel_id': channel_id,
2925 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2926 'duration': duration,
2927 'view_count': int_or_none(
2928 get_first((video_details, microformats), (..., 'viewCount'))
2929 or search_meta('interactionCount')),
2930 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2931 'age_limit': 18 if (
2932 get_first(microformats, 'isFamilySafe') is False
2933 or search_meta('isFamilyFriendly') == 'false'
2934 or search_meta('og:restrictions:age') == '18+') else 0,
2935 'webpage_url': webpage_url,
2936 'categories': [category] if category else None,
2937 'tags': keywords,
2938 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2939 'is_live': is_live,
2940 'was_live': (False if is_live or is_upcoming or live_content is False
2941 else None if is_live is None or is_upcoming is None
2942 else live_content),
2943 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2944 'release_timestamp': live_starttime,
2945 }
2946
2947 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2948 # Converted into dicts to remove duplicates
2949 captions = {
2950 sub.get('baseUrl'): sub
2951 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2952 translation_languages = {
2953 lang.get('languageCode'): lang.get('languageName')
2954 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2955 subtitles = {}
2956 if pctr:
2957 def process_language(container, base_url, lang_code, sub_name, query):
2958 lang_subs = container.setdefault(lang_code, [])
2959 for fmt in self._SUBTITLE_FORMATS:
2960 query.update({
2961 'fmt': fmt,
2962 })
2963 lang_subs.append({
2964 'ext': fmt,
2965 'url': update_url_query(base_url, query),
2966 'name': sub_name,
2967 })
2968
2969 for base_url, caption_track in captions.items():
2970 if not base_url:
2971 continue
2972 if caption_track.get('kind') != 'asr':
2973 lang_code = (
2974 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2975 or caption_track.get('languageCode'))
2976 if not lang_code:
2977 continue
2978 process_language(
2979 subtitles, base_url, lang_code,
2980 traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False),
2981 {})
2982 continue
2983 automatic_captions = {}
2984 for trans_code, trans_name in translation_languages.items():
2985 if not trans_code:
2986 continue
2987 process_language(
2988 automatic_captions, base_url, trans_code,
2989 self._get_text(trans_name, max_runs=1),
2990 {'tlang': trans_code})
2991 info['automatic_captions'] = automatic_captions
2992 info['subtitles'] = subtitles
2993
2994 parsed_url = compat_urllib_parse_urlparse(url)
2995 for component in [parsed_url.fragment, parsed_url.query]:
2996 query = compat_parse_qs(component)
2997 for k, v in query.items():
2998 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2999 d_k += '_time'
3000 if d_k not in info and k in s_ks:
3001 info[d_k] = parse_duration(query[k][0])
3002
3003 # Youtube Music Auto-generated description
3004 if video_description:
3005 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3006 if mobj:
3007 release_year = mobj.group('release_year')
3008 release_date = mobj.group('release_date')
3009 if release_date:
3010 release_date = release_date.replace('-', '')
3011 if not release_year:
3012 release_year = release_date[:4]
3013 info.update({
3014 'album': mobj.group('album'.strip()),
3015 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3016 'track': mobj.group('track').strip(),
3017 'release_date': release_date,
3018 'release_year': int_or_none(release_year),
3019 })
3020
3021 initial_data = None
3022 if webpage:
3023 initial_data = self._extract_yt_initial_variable(
3024 webpage, self._YT_INITIAL_DATA_RE, video_id,
3025 'yt initial data')
3026 if not initial_data:
3027 headers = self.generate_api_headers(
3028 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
3029 session_index=self._extract_session_index(master_ytcfg))
3030
3031 initial_data = self._extract_response(
3032 item_id=video_id, ep='next', fatal=False,
3033 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
3034 note='Downloading initial data API JSON')
3035
3036 try:
3037 # This will error if there is no livechat
3038 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3039 info['subtitles']['live_chat'] = [{
3040 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3041 'video_id': video_id,
3042 'ext': 'json',
3043 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3044 }]
3045 except (KeyError, IndexError, TypeError):
3046 pass
3047
3048 if initial_data:
3049 info['chapters'] = (
3050 self._extract_chapters_from_json(initial_data, duration)
3051 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3052 or None)
3053
3054 contents = try_get(
3055 initial_data,
3056 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3057 list) or []
3058 for content in contents:
3059 vpir = content.get('videoPrimaryInfoRenderer')
3060 if vpir:
3061 stl = vpir.get('superTitleLink')
3062 if stl:
3063 stl = self._get_text(stl)
3064 if try_get(
3065 vpir,
3066 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3067 info['location'] = stl
3068 else:
3069 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3070 if mobj:
3071 info.update({
3072 'series': mobj.group(1),
3073 'season_number': int(mobj.group(2)),
3074 'episode_number': int(mobj.group(3)),
3075 })
3076 for tlb in (try_get(
3077 vpir,
3078 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3079 list) or []):
3080 tbr = tlb.get('toggleButtonRenderer') or {}
3081 for getter, regex in [(
3082 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3083 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3084 lambda x: x['accessibility'],
3085 lambda x: x['accessibilityData']['accessibilityData'],
3086 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3087 label = (try_get(tbr, getter, dict) or {}).get('label')
3088 if label:
3089 mobj = re.match(regex, label)
3090 if mobj:
3091 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3092 break
3093 sbr_tooltip = try_get(
3094 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3095 if sbr_tooltip:
3096 like_count, dislike_count = sbr_tooltip.split(' / ')
3097 info.update({
3098 'like_count': str_to_int(like_count),
3099 'dislike_count': str_to_int(dislike_count),
3100 })
3101 vsir = content.get('videoSecondaryInfoRenderer')
3102 if vsir:
3103 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3104 rows = try_get(
3105 vsir,
3106 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3107 list) or []
3108 multiple_songs = False
3109 for row in rows:
3110 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3111 multiple_songs = True
3112 break
3113 for row in rows:
3114 mrr = row.get('metadataRowRenderer') or {}
3115 mrr_title = mrr.get('title')
3116 if not mrr_title:
3117 continue
3118 mrr_title = self._get_text(mrr, 'title')
3119 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3120 if mrr_title == 'License':
3121 info['license'] = mrr_contents_text
3122 elif not multiple_songs:
3123 if mrr_title == 'Album':
3124 info['album'] = mrr_contents_text
3125 elif mrr_title == 'Artist':
3126 info['artist'] = mrr_contents_text
3127 elif mrr_title == 'Song':
3128 info['track'] = mrr_contents_text
3129
3130 fallbacks = {
3131 'channel': 'uploader',
3132 'channel_id': 'uploader_id',
3133 'channel_url': 'uploader_url',
3134 }
3135 for to, frm in fallbacks.items():
3136 if not info.get(to):
3137 info[to] = info.get(frm)
3138
3139 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3140 v = info.get(s_k)
3141 if v:
3142 info[d_k] = v
3143
3144 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3145 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3146 is_membersonly = None
3147 is_premium = None
3148 if initial_data and is_private is not None:
3149 is_membersonly = False
3150 is_premium = False
3151 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3152 badge_labels = set()
3153 for content in contents:
3154 if not isinstance(content, dict):
3155 continue
3156 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3157 for badge_label in badge_labels:
3158 if badge_label.lower() == 'members only':
3159 is_membersonly = True
3160 elif badge_label.lower() == 'premium':
3161 is_premium = True
3162 elif badge_label.lower() == 'unlisted':
3163 is_unlisted = True
3164
3165 info['availability'] = self._availability(
3166 is_private=is_private,
3167 needs_premium=is_premium,
3168 needs_subscription=is_membersonly,
3169 needs_auth=info['age_limit'] >= 18,
3170 is_unlisted=None if is_private is None else is_unlisted)
3171
3172 if self.get_param('getcomments', False):
3173 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3174
3175 self.mark_watched(video_id, player_responses)
3176
3177 return info
3178
3179
3180class YoutubeTabIE(YoutubeBaseInfoExtractor):
3181 IE_DESC = 'YouTube.com tab'
3182 _VALID_URL = r'''(?x)
3183 https?://
3184 (?:\w+\.)?
3185 (?:
3186 youtube(?:kids)?\.com|
3187 invidio\.us
3188 )/
3189 (?:
3190 (?P<channel_type>channel|c|user|browse)/|
3191 (?P<not_channel>
3192 feed/|hashtag/|
3193 (?:playlist|watch)\?.*?\blist=
3194 )|
3195 (?!(?:%s)\b) # Direct URLs
3196 )
3197 (?P<id>[^/?\#&]+)
3198 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3199 IE_NAME = 'youtube:tab'
3200
3201 _TESTS = [{
3202 'note': 'playlists, multipage',
3203 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3204 'playlist_mincount': 94,
3205 'info_dict': {
3206 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3207 'title': 'Игорь Клейнер - Playlists',
3208 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3209 'uploader': 'Игорь Клейнер',
3210 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3211 },
3212 }, {
3213 'note': 'playlists, multipage, different order',
3214 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3215 'playlist_mincount': 94,
3216 'info_dict': {
3217 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3218 'title': 'Игорь Клейнер - Playlists',
3219 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3220 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3221 'uploader': 'Игорь Клейнер',
3222 },
3223 }, {
3224 'note': 'playlists, series',
3225 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3226 'playlist_mincount': 5,
3227 'info_dict': {
3228 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3229 'title': '3Blue1Brown - Playlists',
3230 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3231 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3232 'uploader': '3Blue1Brown',
3233 },
3234 }, {
3235 'note': 'playlists, singlepage',
3236 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3237 'playlist_mincount': 4,
3238 'info_dict': {
3239 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3240 'title': 'ThirstForScience - Playlists',
3241 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3242 'uploader': 'ThirstForScience',
3243 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3244 }
3245 }, {
3246 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3247 'only_matching': True,
3248 }, {
3249 'note': 'basic, single video playlist',
3250 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3251 'info_dict': {
3252 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3253 'uploader': 'Sergey M.',
3254 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3255 'title': 'youtube-dl public playlist',
3256 },
3257 'playlist_count': 1,
3258 }, {
3259 'note': 'empty playlist',
3260 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3261 'info_dict': {
3262 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3263 'uploader': 'Sergey M.',
3264 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3265 'title': 'youtube-dl empty playlist',
3266 },
3267 'playlist_count': 0,
3268 }, {
3269 'note': 'Home tab',
3270 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3271 'info_dict': {
3272 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3273 'title': 'lex will - Home',
3274 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3275 'uploader': 'lex will',
3276 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3277 },
3278 'playlist_mincount': 2,
3279 }, {
3280 'note': 'Videos tab',
3281 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3282 'info_dict': {
3283 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3284 'title': 'lex will - Videos',
3285 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3286 'uploader': 'lex will',
3287 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3288 },
3289 'playlist_mincount': 975,
3290 }, {
3291 'note': 'Videos tab, sorted by popular',
3292 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3293 'info_dict': {
3294 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3295 'title': 'lex will - Videos',
3296 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3297 'uploader': 'lex will',
3298 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3299 },
3300 'playlist_mincount': 199,
3301 }, {
3302 'note': 'Playlists tab',
3303 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3304 'info_dict': {
3305 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3306 'title': 'lex will - Playlists',
3307 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3308 'uploader': 'lex will',
3309 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3310 },
3311 'playlist_mincount': 17,
3312 }, {
3313 'note': 'Community tab',
3314 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3315 'info_dict': {
3316 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3317 'title': 'lex will - Community',
3318 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3319 'uploader': 'lex will',
3320 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3321 },
3322 'playlist_mincount': 18,
3323 }, {
3324 'note': 'Channels tab',
3325 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3326 'info_dict': {
3327 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3328 'title': 'lex will - Channels',
3329 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3330 'uploader': 'lex will',
3331 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3332 },
3333 'playlist_mincount': 12,
3334 }, {
3335 'note': 'Search tab',
3336 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3337 'playlist_mincount': 40,
3338 'info_dict': {
3339 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3340 'title': '3Blue1Brown - Search - linear algebra',
3341 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3342 'uploader': '3Blue1Brown',
3343 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3344 },
3345 }, {
3346 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3347 'only_matching': True,
3348 }, {
3349 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3350 'only_matching': True,
3351 }, {
3352 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3353 'only_matching': True,
3354 }, {
3355 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3356 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3357 'info_dict': {
3358 'title': '29C3: Not my department',
3359 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3360 'uploader': 'Christiaan008',
3361 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3362 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3363 },
3364 'playlist_count': 96,
3365 }, {
3366 'note': 'Large playlist',
3367 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3368 'info_dict': {
3369 'title': 'Uploads from Cauchemar',
3370 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3371 'uploader': 'Cauchemar',
3372 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3373 },
3374 'playlist_mincount': 1123,
3375 }, {
3376 'note': 'even larger playlist, 8832 videos',
3377 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3378 'only_matching': True,
3379 }, {
3380 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3381 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3382 'info_dict': {
3383 'title': 'Uploads from Interstellar Movie',
3384 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3385 'uploader': 'Interstellar Movie',
3386 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3387 },
3388 'playlist_mincount': 21,
3389 }, {
3390 'note': 'Playlist with "show unavailable videos" button',
3391 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3392 'info_dict': {
3393 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3394 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3395 'uploader': 'Phim Siêu Nhân Nhật Bản',
3396 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3397 },
3398 'playlist_mincount': 200,
3399 }, {
3400 'note': 'Playlist with unavailable videos in page 7',
3401 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3402 'info_dict': {
3403 'title': 'Uploads from BlankTV',
3404 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3405 'uploader': 'BlankTV',
3406 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3407 },
3408 'playlist_mincount': 1000,
3409 }, {
3410 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3411 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3412 'info_dict': {
3413 'title': 'Data Analysis with Dr Mike Pound',
3414 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3415 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3416 'uploader': 'Computerphile',
3417 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3418 },
3419 'playlist_mincount': 11,
3420 }, {
3421 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3422 'only_matching': True,
3423 }, {
3424 'note': 'Playlist URL that does not actually serve a playlist',
3425 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3426 'info_dict': {
3427 'id': 'FqZTN594JQw',
3428 'ext': 'webm',
3429 'title': "Smiley's People 01 detective, Adventure Series, Action",
3430 'uploader': 'STREEM',
3431 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3433 'upload_date': '20150526',
3434 'license': 'Standard YouTube License',
3435 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3436 'categories': ['People & Blogs'],
3437 'tags': list,
3438 'view_count': int,
3439 'like_count': int,
3440 'dislike_count': int,
3441 },
3442 'params': {
3443 'skip_download': True,
3444 },
3445 'skip': 'This video is not available.',
3446 'add_ie': [YoutubeIE.ie_key()],
3447 }, {
3448 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3449 'only_matching': True,
3450 }, {
3451 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3452 'only_matching': True,
3453 }, {
3454 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3455 'info_dict': {
3456 'id': '3yImotZU3tw', # This will keep changing
3457 'ext': 'mp4',
3458 'title': compat_str,
3459 'uploader': 'Sky News',
3460 'uploader_id': 'skynews',
3461 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3462 'upload_date': r're:\d{8}',
3463 'description': compat_str,
3464 'categories': ['News & Politics'],
3465 'tags': list,
3466 'like_count': int,
3467 'dislike_count': int,
3468 },
3469 'params': {
3470 'skip_download': True,
3471 },
3472 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3473 }, {
3474 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3475 'info_dict': {
3476 'id': 'a48o2S1cPoo',
3477 'ext': 'mp4',
3478 'title': 'The Young Turks - Live Main Show',
3479 'uploader': 'The Young Turks',
3480 'uploader_id': 'TheYoungTurks',
3481 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3482 'upload_date': '20150715',
3483 'license': 'Standard YouTube License',
3484 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3485 'categories': ['News & Politics'],
3486 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3487 'like_count': int,
3488 'dislike_count': int,
3489 },
3490 'params': {
3491 'skip_download': True,
3492 },
3493 'only_matching': True,
3494 }, {
3495 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3496 'only_matching': True,
3497 }, {
3498 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3499 'only_matching': True,
3500 }, {
3501 'note': 'A channel that is not live. Should raise error',
3502 'url': 'https://www.youtube.com/user/numberphile/live',
3503 'only_matching': True,
3504 }, {
3505 'url': 'https://www.youtube.com/feed/trending',
3506 'only_matching': True,
3507 }, {
3508 'url': 'https://www.youtube.com/feed/library',
3509 'only_matching': True,
3510 }, {
3511 'url': 'https://www.youtube.com/feed/history',
3512 'only_matching': True,
3513 }, {
3514 'url': 'https://www.youtube.com/feed/subscriptions',
3515 'only_matching': True,
3516 }, {
3517 'url': 'https://www.youtube.com/feed/watch_later',
3518 'only_matching': True,
3519 }, {
3520 'note': 'Recommended - redirects to home page',
3521 'url': 'https://www.youtube.com/feed/recommended',
3522 'only_matching': True,
3523 }, {
3524 'note': 'inline playlist with not always working continuations',
3525 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3526 'only_matching': True,
3527 }, {
3528 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3529 'only_matching': True,
3530 }, {
3531 'url': 'https://www.youtube.com/course',
3532 'only_matching': True,
3533 }, {
3534 'url': 'https://www.youtube.com/zsecurity',
3535 'only_matching': True,
3536 }, {
3537 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3538 'only_matching': True,
3539 }, {
3540 'url': 'https://www.youtube.com/TheYoungTurks/live',
3541 'only_matching': True,
3542 }, {
3543 'url': 'https://www.youtube.com/hashtag/cctv9',
3544 'info_dict': {
3545 'id': 'cctv9',
3546 'title': '#cctv9',
3547 },
3548 'playlist_mincount': 350,
3549 }, {
3550 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3551 'only_matching': True,
3552 }, {
3553 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3554 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3555 'only_matching': True
3556 }, {
3557 'note': '/browse/ should redirect to /channel/',
3558 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3559 'only_matching': True
3560 }, {
3561 'note': 'VLPL, should redirect to playlist?list=PL...',
3562 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3563 'info_dict': {
3564 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3565 'uploader': 'NoCopyrightSounds',
3566 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3567 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3568 'title': 'NCS Releases',
3569 },
3570 'playlist_mincount': 166,
3571 }, {
3572 'note': 'Topic, should redirect to playlist?list=UU...',
3573 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3574 'info_dict': {
3575 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3576 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3577 'title': 'Uploads from Royalty Free Music - Topic',
3578 'uploader': 'Royalty Free Music - Topic',
3579 },
3580 'expected_warnings': [
3581 'A channel/user page was given',
3582 'The URL does not have a videos tab',
3583 ],
3584 'playlist_mincount': 101,
3585 }, {
3586 'note': 'Topic without a UU playlist',
3587 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3588 'info_dict': {
3589 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3590 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3591 },
3592 'expected_warnings': [
3593 'A channel/user page was given',
3594 'The URL does not have a videos tab',
3595 'Falling back to channel URL',
3596 ],
3597 'playlist_mincount': 9,
3598 }, {
3599 'note': 'Youtube music Album',
3600 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3601 'info_dict': {
3602 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3603 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3604 },
3605 'playlist_count': 50,
3606 }, {
3607 'note': 'unlisted single video playlist',
3608 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3609 'info_dict': {
3610 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3611 'uploader': 'colethedj',
3612 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3613 'title': 'yt-dlp unlisted playlist test',
3614 'availability': 'unlisted'
3615 },
3616 'playlist_count': 1,
3617 }]
3618
3619 @classmethod
3620 def suitable(cls, url):
3621 return False if YoutubeIE.suitable(url) else super(
3622 YoutubeTabIE, cls).suitable(url)
3623
3624 def _extract_channel_id(self, webpage):
3625 channel_id = self._html_search_meta(
3626 'channelId', webpage, 'channel id', default=None)
3627 if channel_id:
3628 return channel_id
3629 channel_url = self._html_search_meta(
3630 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3631 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3632 'twitter:app:url:googleplay'), webpage, 'channel url')
3633 return self._search_regex(
3634 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3635 channel_url, 'channel id')
3636
3637 @staticmethod
3638 def _extract_basic_item_renderer(item):
3639 # Modified from _extract_grid_item_renderer
3640 known_basic_renderers = (
3641 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3642 )
3643 for key, renderer in item.items():
3644 if not isinstance(renderer, dict):
3645 continue
3646 elif key in known_basic_renderers:
3647 return renderer
3648 elif key.startswith('grid') and key.endswith('Renderer'):
3649 return renderer
3650
3651 def _grid_entries(self, grid_renderer):
3652 for item in grid_renderer['items']:
3653 if not isinstance(item, dict):
3654 continue
3655 renderer = self._extract_basic_item_renderer(item)
3656 if not isinstance(renderer, dict):
3657 continue
3658 title = self._get_text(renderer, 'title')
3659
3660 # playlist
3661 playlist_id = renderer.get('playlistId')
3662 if playlist_id:
3663 yield self.url_result(
3664 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3665 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3666 video_title=title)
3667 continue
3668 # video
3669 video_id = renderer.get('videoId')
3670 if video_id:
3671 yield self._extract_video(renderer)
3672 continue
3673 # channel
3674 channel_id = renderer.get('channelId')
3675 if channel_id:
3676 yield self.url_result(
3677 'https://www.youtube.com/channel/%s' % channel_id,
3678 ie=YoutubeTabIE.ie_key(), video_title=title)
3679 continue
3680 # generic endpoint URL support
3681 ep_url = urljoin('https://www.youtube.com/', try_get(
3682 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3683 compat_str))
3684 if ep_url:
3685 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3686 if ie.suitable(ep_url):
3687 yield self.url_result(
3688 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3689 break
3690
3691 def _shelf_entries_from_content(self, shelf_renderer):
3692 content = shelf_renderer.get('content')
3693 if not isinstance(content, dict):
3694 return
3695 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3696 if renderer:
3697 # TODO: add support for nested playlists so each shelf is processed
3698 # as separate playlist
3699 # TODO: this includes only first N items
3700 for entry in self._grid_entries(renderer):
3701 yield entry
3702 renderer = content.get('horizontalListRenderer')
3703 if renderer:
3704 # TODO
3705 pass
3706
3707 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3708 ep = try_get(
3709 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3710 compat_str)
3711 shelf_url = urljoin('https://www.youtube.com', ep)
3712 if shelf_url:
3713 # Skipping links to another channels, note that checking for
3714 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3715 # will not work
3716 if skip_channels and '/channels?' in shelf_url:
3717 return
3718 title = self._get_text(shelf_renderer, 'title')
3719 yield self.url_result(shelf_url, video_title=title)
3720 # Shelf may not contain shelf URL, fallback to extraction from content
3721 for entry in self._shelf_entries_from_content(shelf_renderer):
3722 yield entry
3723
3724 def _playlist_entries(self, video_list_renderer):
3725 for content in video_list_renderer['contents']:
3726 if not isinstance(content, dict):
3727 continue
3728 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3729 if not isinstance(renderer, dict):
3730 continue
3731 video_id = renderer.get('videoId')
3732 if not video_id:
3733 continue
3734 yield self._extract_video(renderer)
3735
3736 def _rich_entries(self, rich_grid_renderer):
3737 renderer = try_get(
3738 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3739 video_id = renderer.get('videoId')
3740 if not video_id:
3741 return
3742 yield self._extract_video(renderer)
3743
3744 def _video_entry(self, video_renderer):
3745 video_id = video_renderer.get('videoId')
3746 if video_id:
3747 return self._extract_video(video_renderer)
3748
3749 def _post_thread_entries(self, post_thread_renderer):
3750 post_renderer = try_get(
3751 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3752 if not post_renderer:
3753 return
3754 # video attachment
3755 video_renderer = try_get(
3756 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3757 video_id = video_renderer.get('videoId')
3758 if video_id:
3759 entry = self._extract_video(video_renderer)
3760 if entry:
3761 yield entry
3762 # playlist attachment
3763 playlist_id = try_get(
3764 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3765 if playlist_id:
3766 yield self.url_result(
3767 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3768 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3769 # inline video links
3770 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3771 for run in runs:
3772 if not isinstance(run, dict):
3773 continue
3774 ep_url = try_get(
3775 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3776 if not ep_url:
3777 continue
3778 if not YoutubeIE.suitable(ep_url):
3779 continue
3780 ep_video_id = YoutubeIE._match_id(ep_url)
3781 if video_id == ep_video_id:
3782 continue
3783 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3784
3785 def _post_thread_continuation_entries(self, post_thread_continuation):
3786 contents = post_thread_continuation.get('contents')
3787 if not isinstance(contents, list):
3788 return
3789 for content in contents:
3790 renderer = content.get('backstagePostThreadRenderer')
3791 if not isinstance(renderer, dict):
3792 continue
3793 for entry in self._post_thread_entries(renderer):
3794 yield entry
3795
3796 r''' # unused
3797 def _rich_grid_entries(self, contents):
3798 for content in contents:
3799 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3800 if video_renderer:
3801 entry = self._video_entry(video_renderer)
3802 if entry:
3803 yield entry
3804 '''
3805 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3806
3807 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3808 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3809 for content in contents:
3810 if not isinstance(content, dict):
3811 continue
3812 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3813 if not is_renderer:
3814 renderer = content.get('richItemRenderer')
3815 if renderer:
3816 for entry in self._rich_entries(renderer):
3817 yield entry
3818 continuation_list[0] = self._extract_continuation(parent_renderer)
3819 continue
3820 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3821 for isr_content in isr_contents:
3822 if not isinstance(isr_content, dict):
3823 continue
3824
3825 known_renderers = {
3826 'playlistVideoListRenderer': self._playlist_entries,
3827 'gridRenderer': self._grid_entries,
3828 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3829 'backstagePostThreadRenderer': self._post_thread_entries,
3830 'videoRenderer': lambda x: [self._video_entry(x)],
3831 }
3832 for key, renderer in isr_content.items():
3833 if key not in known_renderers:
3834 continue
3835 for entry in known_renderers[key](renderer):
3836 if entry:
3837 yield entry
3838 continuation_list[0] = self._extract_continuation(renderer)
3839 break
3840
3841 if not continuation_list[0]:
3842 continuation_list[0] = self._extract_continuation(is_renderer)
3843
3844 if not continuation_list[0]:
3845 continuation_list[0] = self._extract_continuation(parent_renderer)
3846
3847 continuation_list = [None] # Python 2 doesnot support nonlocal
3848 tab_content = try_get(tab, lambda x: x['content'], dict)
3849 if not tab_content:
3850 return
3851 parent_renderer = (
3852 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3853 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3854 for entry in extract_entries(parent_renderer):
3855 yield entry
3856 continuation = continuation_list[0]
3857 visitor_data = None
3858
3859 for page_num in itertools.count(1):
3860 if not continuation:
3861 break
3862 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3863 response = self._extract_response(
3864 item_id='%s page %s' % (item_id, page_num),
3865 query=continuation, headers=headers, ytcfg=ytcfg,
3866 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3867
3868 if not response:
3869 break
3870 visitor_data = try_get(
3871 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3872
3873 known_continuation_renderers = {
3874 'playlistVideoListContinuation': self._playlist_entries,
3875 'gridContinuation': self._grid_entries,
3876 'itemSectionContinuation': self._post_thread_continuation_entries,
3877 'sectionListContinuation': extract_entries, # for feeds
3878 }
3879 continuation_contents = try_get(
3880 response, lambda x: x['continuationContents'], dict) or {}
3881 continuation_renderer = None
3882 for key, value in continuation_contents.items():
3883 if key not in known_continuation_renderers:
3884 continue
3885 continuation_renderer = value
3886 continuation_list = [None]
3887 for entry in known_continuation_renderers[key](continuation_renderer):
3888 yield entry
3889 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3890 break
3891 if continuation_renderer:
3892 continue
3893
3894 known_renderers = {
3895 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3896 'gridVideoRenderer': (self._grid_entries, 'items'),
3897 'gridChannelRenderer': (self._grid_entries, 'items'),
3898 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3899 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3900 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3901 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3902 }
3903 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3904 continuation_items = try_get(
3905 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3906 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3907 video_items_renderer = None
3908 for key, value in continuation_item.items():
3909 if key not in known_renderers:
3910 continue
3911 video_items_renderer = {known_renderers[key][1]: continuation_items}
3912 continuation_list = [None]
3913 for entry in known_renderers[key][0](video_items_renderer):
3914 yield entry
3915 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3916 break
3917 if video_items_renderer:
3918 continue
3919 break
3920
3921 @staticmethod
3922 def _extract_selected_tab(tabs):
3923 for tab in tabs:
3924 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3925 if renderer.get('selected') is True:
3926 return renderer
3927 else:
3928 raise ExtractorError('Unable to find selected tab')
3929
3930 @classmethod
3931 def _extract_uploader(cls, data):
3932 uploader = {}
3933 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3934 owner = try_get(
3935 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3936 if owner:
3937 uploader['uploader'] = owner.get('text')
3938 uploader['uploader_id'] = try_get(
3939 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3940 uploader['uploader_url'] = urljoin(
3941 'https://www.youtube.com/',
3942 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3943 return {k: v for k, v in uploader.items() if v is not None}
3944
3945 def _extract_from_tabs(self, item_id, webpage, data, tabs):
3946 playlist_id = title = description = channel_url = channel_name = channel_id = None
3947 thumbnails_list = tags = []
3948
3949 selected_tab = self._extract_selected_tab(tabs)
3950 renderer = try_get(
3951 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3952 if renderer:
3953 channel_name = renderer.get('title')
3954 channel_url = renderer.get('channelUrl')
3955 channel_id = renderer.get('externalId')
3956 else:
3957 renderer = try_get(
3958 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3959
3960 if renderer:
3961 title = renderer.get('title')
3962 description = renderer.get('description', '')
3963 playlist_id = channel_id
3964 tags = renderer.get('keywords', '').split()
3965 thumbnails_list = (
3966 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3967 or try_get(
3968 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3969 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3970 list)
3971 or [])
3972
3973 thumbnails = []
3974 for t in thumbnails_list:
3975 if not isinstance(t, dict):
3976 continue
3977 thumbnail_url = url_or_none(t.get('url'))
3978 if not thumbnail_url:
3979 continue
3980 thumbnails.append({
3981 'url': thumbnail_url,
3982 'width': int_or_none(t.get('width')),
3983 'height': int_or_none(t.get('height')),
3984 })
3985 if playlist_id is None:
3986 playlist_id = item_id
3987 if title is None:
3988 title = (
3989 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3990 or playlist_id)
3991 title += format_field(selected_tab, 'title', ' - %s')
3992 title += format_field(selected_tab, 'expandedText', ' - %s')
3993 metadata = {
3994 'playlist_id': playlist_id,
3995 'playlist_title': title,
3996 'playlist_description': description,
3997 'uploader': channel_name,
3998 'uploader_id': channel_id,
3999 'uploader_url': channel_url,
4000 'thumbnails': thumbnails,
4001 'tags': tags,
4002 }
4003 availability = self._extract_availability(data)
4004 if availability:
4005 metadata['availability'] = availability
4006 if not channel_id:
4007 metadata.update(self._extract_uploader(data))
4008 metadata.update({
4009 'channel': metadata['uploader'],
4010 'channel_id': metadata['uploader_id'],
4011 'channel_url': metadata['uploader_url']})
4012 ytcfg = self.extract_ytcfg(item_id, webpage)
4013 return self.playlist_result(
4014 self._entries(
4015 selected_tab, playlist_id,
4016 self._extract_identity_token(webpage, item_id),
4017 self._extract_account_syncid(ytcfg, data), ytcfg),
4018 **metadata)
4019
4020 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
4021 first_id = last_id = None
4022 ytcfg = self.extract_ytcfg(playlist_id, webpage)
4023 headers = self.generate_api_headers(
4024 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4025 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
4026 for page_num in itertools.count(1):
4027 videos = list(self._playlist_entries(playlist))
4028 if not videos:
4029 return
4030 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4031 if start >= len(videos):
4032 return
4033 for video in videos[start:]:
4034 if video['id'] == first_id:
4035 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4036 return
4037 yield video
4038 first_id = first_id or videos[0]['id']
4039 last_id = videos[-1]['id']
4040 watch_endpoint = try_get(
4041 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4042 query = {
4043 'playlistId': playlist_id,
4044 'videoId': watch_endpoint.get('videoId') or last_id,
4045 'index': watch_endpoint.get('index') or len(videos),
4046 'params': watch_endpoint.get('params') or 'OAE%3D'
4047 }
4048 response = self._extract_response(
4049 item_id='%s page %d' % (playlist_id, page_num),
4050 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4051 check_get_keys='contents'
4052 )
4053 playlist = try_get(
4054 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4055
4056 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4057 title = playlist.get('title') or try_get(
4058 data, lambda x: x['titleText']['simpleText'], compat_str)
4059 playlist_id = playlist.get('playlistId') or item_id
4060
4061 # Delegating everything except mix playlists to regular tab-based playlist URL
4062 playlist_url = urljoin(url, try_get(
4063 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4064 compat_str))
4065 if playlist_url and playlist_url != url:
4066 return self.url_result(
4067 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4068 video_title=title)
4069
4070 return self.playlist_result(
4071 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4072 playlist_id=playlist_id, playlist_title=title)
4073
4074 def _extract_availability(self, data):
4075 """
4076 Gets the availability of a given playlist/tab.
4077 Note: Unless YouTube tells us explicitly, we do not assume it is public
4078 @param data: response
4079 """
4080 is_private = is_unlisted = None
4081 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4082 badge_labels = self._extract_badges(renderer)
4083
4084 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4085 privacy_dropdown_entries = try_get(
4086 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4087 for renderer_dict in privacy_dropdown_entries:
4088 is_selected = try_get(
4089 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4090 if not is_selected:
4091 continue
4092 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4093 if label:
4094 badge_labels.add(label.lower())
4095 break
4096
4097 for badge_label in badge_labels:
4098 if badge_label == 'unlisted':
4099 is_unlisted = True
4100 elif badge_label == 'private':
4101 is_private = True
4102 elif badge_label == 'public':
4103 is_unlisted = is_private = False
4104 return self._availability(is_private, False, False, False, is_unlisted)
4105
4106 @staticmethod
4107 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4108 sidebar_renderer = try_get(
4109 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4110 for item in sidebar_renderer:
4111 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4112 if renderer:
4113 return renderer
4114
4115 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4116 """
4117 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4118 """
4119 browse_id = params = None
4120 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4121 if not renderer:
4122 return
4123 menu_renderer = try_get(
4124 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4125 for menu_item in menu_renderer:
4126 if not isinstance(menu_item, dict):
4127 continue
4128 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4129 text = try_get(
4130 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4131 if not text or text.lower() != 'show unavailable videos':
4132 continue
4133 browse_endpoint = try_get(
4134 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4135 browse_id = browse_endpoint.get('browseId')
4136 params = browse_endpoint.get('params')
4137 break
4138
4139 ytcfg = self.extract_ytcfg(item_id, webpage)
4140 headers = self.generate_api_headers(
4141 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4142 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4143 visitor_data=try_get(
4144 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4145 query = {
4146 'params': params or 'wgYCCAA=',
4147 'browseId': browse_id or 'VL%s' % item_id
4148 }
4149 return self._extract_response(
4150 item_id=item_id, headers=headers, query=query,
4151 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4152 note='Downloading API JSON with unavailable videos')
4153
4154 def _extract_webpage(self, url, item_id):
4155 retries = self.get_param('extractor_retries', 3)
4156 count = -1
4157 last_error = 'Incomplete yt initial data recieved'
4158 while count < retries:
4159 count += 1
4160 # Sometimes youtube returns a webpage with incomplete ytInitialData
4161 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4162 if count:
4163 self.report_warning('%s. Retrying ...' % last_error)
4164 webpage = self._download_webpage(
4165 url, item_id,
4166 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4167 data = self.extract_yt_initial_data(item_id, webpage)
4168 if data.get('contents') or data.get('currentVideoEndpoint'):
4169 break
4170 # Extract alerts here only when there is error
4171 self._extract_and_report_alerts(data)
4172 if count >= retries:
4173 raise ExtractorError(last_error)
4174 return webpage, data
4175
4176 @staticmethod
4177 def _smuggle_data(entries, data):
4178 for entry in entries:
4179 if data:
4180 entry['url'] = smuggle_url(entry['url'], data)
4181 yield entry
4182
4183 def _real_extract(self, url):
4184 url, smuggled_data = unsmuggle_url(url, {})
4185 if self.is_music_url(url):
4186 smuggled_data['is_music_url'] = True
4187 info_dict = self.__real_extract(url, smuggled_data)
4188 if info_dict.get('entries'):
4189 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4190 return info_dict
4191
4192 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4193
4194 def __real_extract(self, url, smuggled_data):
4195 item_id = self._match_id(url)
4196 url = compat_urlparse.urlunparse(
4197 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4198 compat_opts = self.get_param('compat_opts', [])
4199
4200 def get_mobj(url):
4201 mobj = self._url_re.match(url).groupdict()
4202 mobj.update((k, '') for k, v in mobj.items() if v is None)
4203 return mobj
4204
4205 mobj = get_mobj(url)
4206 # Youtube returns incomplete data if tabname is not lower case
4207 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4208
4209 if is_channel:
4210 if smuggled_data.get('is_music_url'):
4211 if item_id[:2] == 'VL':
4212 # Youtube music VL channels have an equivalent playlist
4213 item_id = item_id[2:]
4214 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4215 elif item_id[:2] == 'MP':
4216 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4217 item_id = self._search_regex(
4218 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4219 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4220 'playlist id')
4221 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4222 elif mobj['channel_type'] == 'browse':
4223 # Youtube music /browse/ should be changed to /channel/
4224 pre = 'https://www.youtube.com/channel/%s' % item_id
4225 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4226 # Home URLs should redirect to /videos/
4227 self.report_warning(
4228 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4229 'To download only the videos in the home page, add a "/featured" to the URL')
4230 tab = '/videos'
4231
4232 url = ''.join((pre, tab, post))
4233 mobj = get_mobj(url)
4234
4235 # Handle both video/playlist URLs
4236 qs = parse_qs(url)
4237 video_id = qs.get('v', [None])[0]
4238 playlist_id = qs.get('list', [None])[0]
4239
4240 if not video_id and mobj['not_channel'].startswith('watch'):
4241 if not playlist_id:
4242 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4243 raise ExtractorError('Unable to recognize tab page')
4244 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4245 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4246 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4247 mobj = get_mobj(url)
4248
4249 if video_id and playlist_id:
4250 if self.get_param('noplaylist'):
4251 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4252 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4253 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4254
4255 webpage, data = self._extract_webpage(url, item_id)
4256
4257 tabs = try_get(
4258 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4259 if tabs:
4260 selected_tab = self._extract_selected_tab(tabs)
4261 tab_name = selected_tab.get('title', '')
4262 if 'no-youtube-channel-redirect' not in compat_opts:
4263 if mobj['tab'] == '/live':
4264 # Live tab should have redirected to the video
4265 raise ExtractorError('The channel is not currently live', expected=True)
4266 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4267 if not mobj['not_channel'] and item_id[:2] == 'UC':
4268 # Topic channels don't have /videos. Use the equivalent playlist instead
4269 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4270 pl_id = 'UU%s' % item_id[2:]
4271 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4272 try:
4273 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4274 for alert_type, alert_message in self._extract_alerts(pl_data):
4275 if alert_type == 'error':
4276 raise ExtractorError('Youtube said: %s' % alert_message)
4277 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4278 except ExtractorError:
4279 self.report_warning('The playlist gave error. Falling back to channel URL')
4280 else:
4281 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4282
4283 self.write_debug('Final URL: %s' % url)
4284
4285 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4286 if 'no-youtube-unavailable-videos' not in compat_opts:
4287 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4288 self._extract_and_report_alerts(data)
4289 tabs = try_get(
4290 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4291 if tabs:
4292 return self._extract_from_tabs(item_id, webpage, data, tabs)
4293
4294 playlist = try_get(
4295 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4296 if playlist:
4297 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4298
4299 video_id = try_get(
4300 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4301 compat_str) or video_id
4302 if video_id:
4303 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4304 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4305 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4306
4307 raise ExtractorError('Unable to recognize tab page')
4308
4309
4310class YoutubePlaylistIE(InfoExtractor):
4311 IE_DESC = 'YouTube.com playlists'
4312 _VALID_URL = r'''(?x)(?:
4313 (?:https?://)?
4314 (?:\w+\.)?
4315 (?:
4316 (?:
4317 youtube(?:kids)?\.com|
4318 invidio\.us
4319 )
4320 /.*?\?.*?\blist=
4321 )?
4322 (?P<id>%(playlist_id)s)
4323 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4324 IE_NAME = 'youtube:playlist'
4325 _TESTS = [{
4326 'note': 'issue #673',
4327 'url': 'PLBB231211A4F62143',
4328 'info_dict': {
4329 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4330 'id': 'PLBB231211A4F62143',
4331 'uploader': 'Wickydoo',
4332 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4333 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4334 },
4335 'playlist_mincount': 29,
4336 }, {
4337 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4338 'info_dict': {
4339 'title': 'YDL_safe_search',
4340 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4341 },
4342 'playlist_count': 2,
4343 'skip': 'This playlist is private',
4344 }, {
4345 'note': 'embedded',
4346 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4347 'playlist_count': 4,
4348 'info_dict': {
4349 'title': 'JODA15',
4350 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4351 'uploader': 'milan',
4352 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4353 }
4354 }, {
4355 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4356 'playlist_mincount': 654,
4357 'info_dict': {
4358 'title': '2018 Chinese New Singles (11/6 updated)',
4359 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4360 'uploader': 'LBK',
4361 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4362 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4363 }
4364 }, {
4365 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4366 'only_matching': True,
4367 }, {
4368 # music album playlist
4369 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4370 'only_matching': True,
4371 }]
4372
4373 @classmethod
4374 def suitable(cls, url):
4375 if YoutubeTabIE.suitable(url):
4376 return False
4377 # Hack for lazy extractors until more generic solution is implemented
4378 # (see #28780)
4379 from .youtube import parse_qs
4380 qs = parse_qs(url)
4381 if qs.get('v', [None])[0]:
4382 return False
4383 return super(YoutubePlaylistIE, cls).suitable(url)
4384
4385 def _real_extract(self, url):
4386 playlist_id = self._match_id(url)
4387 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4388 url = update_url_query(
4389 'https://www.youtube.com/playlist',
4390 parse_qs(url) or {'list': playlist_id})
4391 if is_music_url:
4392 url = smuggle_url(url, {'is_music_url': True})
4393 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4394
4395
4396class YoutubeYtBeIE(InfoExtractor):
4397 IE_DESC = 'youtu.be'
4398 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4399 _TESTS = [{
4400 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4401 'info_dict': {
4402 'id': 'yeWKywCrFtk',
4403 'ext': 'mp4',
4404 'title': 'Small Scale Baler and Braiding Rugs',
4405 'uploader': 'Backus-Page House Museum',
4406 'uploader_id': 'backuspagemuseum',
4407 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4408 'upload_date': '20161008',
4409 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4410 'categories': ['Nonprofits & Activism'],
4411 'tags': list,
4412 'like_count': int,
4413 'dislike_count': int,
4414 },
4415 'params': {
4416 'noplaylist': True,
4417 'skip_download': True,
4418 },
4419 }, {
4420 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4421 'only_matching': True,
4422 }]
4423
4424 def _real_extract(self, url):
4425 mobj = self._match_valid_url(url)
4426 video_id = mobj.group('id')
4427 playlist_id = mobj.group('playlist_id')
4428 return self.url_result(
4429 update_url_query('https://www.youtube.com/watch', {
4430 'v': video_id,
4431 'list': playlist_id,
4432 'feature': 'youtu.be',
4433 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4434
4435
4436class YoutubeYtUserIE(InfoExtractor):
4437 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4438 _VALID_URL = r'ytuser:(?P<id>.+)'
4439 _TESTS = [{
4440 'url': 'ytuser:phihag',
4441 'only_matching': True,
4442 }]
4443
4444 def _real_extract(self, url):
4445 user_id = self._match_id(url)
4446 return self.url_result(
4447 'https://www.youtube.com/user/%s' % user_id,
4448 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4449
4450
4451class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4452 IE_NAME = 'youtube:favorites'
4453 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4454 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4455 _LOGIN_REQUIRED = True
4456 _TESTS = [{
4457 'url': ':ytfav',
4458 'only_matching': True,
4459 }, {
4460 'url': ':ytfavorites',
4461 'only_matching': True,
4462 }]
4463
4464 def _real_extract(self, url):
4465 return self.url_result(
4466 'https://www.youtube.com/playlist?list=LL',
4467 ie=YoutubeTabIE.ie_key())
4468
4469
4470class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4471 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4472 # there doesn't appear to be a real limit, for example if you search for
4473 # 'python' you get more than 8.000.000 results
4474 _MAX_RESULTS = float('inf')
4475 IE_NAME = 'youtube:search'
4476 _SEARCH_KEY = 'ytsearch'
4477 _SEARCH_PARAMS = None
4478 _TESTS = []
4479
4480 def _entries(self, query, n):
4481 data = {'query': query}
4482 if self._SEARCH_PARAMS:
4483 data['params'] = self._SEARCH_PARAMS
4484 total = 0
4485 continuation = {}
4486 for page_num in itertools.count(1):
4487 data.update(continuation)
4488 search = self._extract_response(
4489 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4490 check_get_keys=('contents', 'onResponseReceivedCommands')
4491 )
4492 if not search:
4493 break
4494 slr_contents = try_get(
4495 search,
4496 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4497 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4498 list)
4499 if not slr_contents:
4500 break
4501
4502 # Youtube sometimes adds promoted content to searches,
4503 # changing the index location of videos and token.
4504 # So we search through all entries till we find them.
4505 continuation = None
4506 for slr_content in slr_contents:
4507 if not continuation:
4508 continuation = self._extract_continuation({'contents': [slr_content]})
4509
4510 isr_contents = try_get(
4511 slr_content,
4512 lambda x: x['itemSectionRenderer']['contents'],
4513 list)
4514 if not isr_contents:
4515 continue
4516 for content in isr_contents:
4517 if not isinstance(content, dict):
4518 continue
4519 video = content.get('videoRenderer')
4520 if not isinstance(video, dict):
4521 continue
4522 video_id = video.get('videoId')
4523 if not video_id:
4524 continue
4525
4526 yield self._extract_video(video)
4527 total += 1
4528 if total == n:
4529 return
4530
4531 if not continuation:
4532 break
4533
4534 def _get_n_results(self, query, n):
4535 """Get a specified number of results for a query"""
4536 return self.playlist_result(self._entries(query, n), query, query)
4537
4538
4539class YoutubeSearchDateIE(YoutubeSearchIE):
4540 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4541 _SEARCH_KEY = 'ytsearchdate'
4542 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4543 _SEARCH_PARAMS = 'CAI%3D'
4544
4545
4546class YoutubeSearchURLIE(YoutubeSearchIE):
4547 IE_DESC = 'YouTube.com search URLs'
4548 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4549 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4550 # _MAX_RESULTS = 100
4551 _TESTS = [{
4552 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4553 'playlist_mincount': 5,
4554 'info_dict': {
4555 'id': 'youtube-dl test video',
4556 'title': 'youtube-dl test video',
4557 }
4558 }, {
4559 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4560 'only_matching': True,
4561 }]
4562
4563 @classmethod
4564 def _make_valid_url(cls):
4565 return cls._VALID_URL
4566
4567 def _real_extract(self, url):
4568 qs = parse_qs(url)
4569 query = (qs.get('search_query') or qs.get('q'))[0]
4570 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4571 return self._get_n_results(query, self._MAX_RESULTS)
4572
4573
4574class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4575 """
4576 Base class for feed extractors
4577 Subclasses must define the _FEED_NAME property.
4578 """
4579 _LOGIN_REQUIRED = True
4580 _TESTS = []
4581
4582 @property
4583 def IE_NAME(self):
4584 return 'youtube:%s' % self._FEED_NAME
4585
4586 def _real_extract(self, url):
4587 return self.url_result(
4588 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4589 ie=YoutubeTabIE.ie_key())
4590
4591
4592class YoutubeWatchLaterIE(InfoExtractor):
4593 IE_NAME = 'youtube:watchlater'
4594 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4595 _VALID_URL = r':ytwatchlater'
4596 _TESTS = [{
4597 'url': ':ytwatchlater',
4598 'only_matching': True,
4599 }]
4600
4601 def _real_extract(self, url):
4602 return self.url_result(
4603 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4604
4605
4606class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4607 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4608 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4609 _FEED_NAME = 'recommended'
4610 _LOGIN_REQUIRED = False
4611 _TESTS = [{
4612 'url': ':ytrec',
4613 'only_matching': True,
4614 }, {
4615 'url': ':ytrecommended',
4616 'only_matching': True,
4617 }, {
4618 'url': 'https://youtube.com',
4619 'only_matching': True,
4620 }]
4621
4622
4623class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4624 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4625 _VALID_URL = r':ytsub(?:scription)?s?'
4626 _FEED_NAME = 'subscriptions'
4627 _TESTS = [{
4628 'url': ':ytsubs',
4629 'only_matching': True,
4630 }, {
4631 'url': ':ytsubscriptions',
4632 'only_matching': True,
4633 }]
4634
4635
4636class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4637 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4638 _VALID_URL = r':ythis(?:tory)?'
4639 _FEED_NAME = 'history'
4640 _TESTS = [{
4641 'url': ':ythistory',
4642 'only_matching': True,
4643 }]
4644
4645
4646class YoutubeTruncatedURLIE(InfoExtractor):
4647 IE_NAME = 'youtube:truncated_url'
4648 IE_DESC = False # Do not list
4649 _VALID_URL = r'''(?x)
4650 (?:https?://)?
4651 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4652 (?:watch\?(?:
4653 feature=[a-z_]+|
4654 annotation_id=annotation_[^&]+|
4655 x-yt-cl=[0-9]+|
4656 hl=[^&]*|
4657 t=[0-9]+
4658 )?
4659 |
4660 attribution_link\?a=[^&]+
4661 )
4662 $
4663 '''
4664
4665 _TESTS = [{
4666 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4667 'only_matching': True,
4668 }, {
4669 'url': 'https://www.youtube.com/watch?',
4670 'only_matching': True,
4671 }, {
4672 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4673 'only_matching': True,
4674 }, {
4675 'url': 'https://www.youtube.com/watch?feature=foo',
4676 'only_matching': True,
4677 }, {
4678 'url': 'https://www.youtube.com/watch?hl=en-GB',
4679 'only_matching': True,
4680 }, {
4681 'url': 'https://www.youtube.com/watch?t=2372',
4682 'only_matching': True,
4683 }]
4684
4685 def _real_extract(self, url):
4686 raise ExtractorError(
4687 'Did you forget to quote the URL? Remember that & is a meta '
4688 'character in most shells, so you want to put the URL in quotes, '
4689 'like youtube-dl '
4690 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4691 ' or simply youtube-dl BaW_jenozKc .',
4692 expected=True)
4693
4694
4695class YoutubeTruncatedIDIE(InfoExtractor):
4696 IE_NAME = 'youtube:truncated_id'
4697 IE_DESC = False # Do not list
4698 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4699
4700 _TESTS = [{
4701 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4702 'only_matching': True,
4703 }]
4704
4705 def _real_extract(self, url):
4706 video_id = self._match_id(url)
4707 raise ExtractorError(
4708 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4709 expected=True)