]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[youtube] Fix controversial videos when requested via API (#533)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import base64
6 import calendar
7 import copy
8 import datetime
9 import hashlib
10 import itertools
11 import json
12 import os.path
13 import random
14 import re
15 import time
16 import traceback
17
18 from .common import InfoExtractor, SearchInfoExtractor
19 from ..compat import (
20 compat_chr,
21 compat_HTTPError,
22 compat_parse_qs,
23 compat_str,
24 compat_urllib_parse_unquote_plus,
25 compat_urllib_parse_urlencode,
26 compat_urllib_parse_urlparse,
27 compat_urlparse,
28 )
29 from ..jsinterp import JSInterpreter
30 from ..utils import (
31 bool_or_none,
32 bytes_to_intlist,
33 clean_html,
34 dict_get,
35 datetime_from_str,
36 error_to_compat_str,
37 ExtractorError,
38 format_field,
39 float_or_none,
40 int_or_none,
41 intlist_to_bytes,
42 mimetype2ext,
43 parse_codecs,
44 parse_count,
45 parse_duration,
46 qualities,
47 remove_start,
48 smuggle_url,
49 str_or_none,
50 str_to_int,
51 traverse_obj,
52 try_get,
53 unescapeHTML,
54 unified_strdate,
55 unsmuggle_url,
56 update_url_query,
57 url_or_none,
58 urlencode_postdata,
59 urljoin,
60 variadic,
61 )
62
63
64 def parse_qs(url):
65 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
66
67
68 class YoutubeBaseInfoExtractor(InfoExtractor):
69 """Provide base functions for Youtube extractors"""
70 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
71 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
72
73 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
74 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
75 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
76
77 _RESERVED_NAMES = (
78 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
79 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
80 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
81
82 _NETRC_MACHINE = 'youtube'
83 # If True it will raise an error if no login info is provided
84 _LOGIN_REQUIRED = False
85
86 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
87
88 def _login(self):
89 """
90 Attempt to log in to YouTube.
91 True is returned if successful or skipped.
92 False is returned if login failed.
93
94 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
95 """
96
97 def warn(message):
98 self.report_warning(message)
99
100 # username+password login is broken
101 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
102 self.raise_login_required(
103 'Login details are needed to download this content', method='cookies')
104 username, password = self._get_login_info()
105 if username:
106 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
107 return
108
109 # Everything below this is broken!
110 r'''
111 # No authentication to be performed
112 if username is None:
113 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
114 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
115 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
116 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
117 return True
118
119 login_page = self._download_webpage(
120 self._LOGIN_URL, None,
121 note='Downloading login page',
122 errnote='unable to fetch login page', fatal=False)
123 if login_page is False:
124 return
125
126 login_form = self._hidden_inputs(login_page)
127
128 def req(url, f_req, note, errnote):
129 data = login_form.copy()
130 data.update({
131 'pstMsg': 1,
132 'checkConnection': 'youtube',
133 'checkedDomains': 'youtube',
134 'hl': 'en',
135 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
136 'f.req': json.dumps(f_req),
137 'flowName': 'GlifWebSignIn',
138 'flowEntry': 'ServiceLogin',
139 # TODO: reverse actual botguard identifier generation algo
140 'bgRequest': '["identifier",""]',
141 })
142 return self._download_json(
143 url, None, note=note, errnote=errnote,
144 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
145 fatal=False,
146 data=urlencode_postdata(data), headers={
147 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
148 'Google-Accounts-XSRF': 1,
149 })
150
151 lookup_req = [
152 username,
153 None, [], None, 'US', None, None, 2, False, True,
154 [
155 None, None,
156 [2, 1, None, 1,
157 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
158 None, [], 4],
159 1, [None, None, []], None, None, None, True
160 ],
161 username,
162 ]
163
164 lookup_results = req(
165 self._LOOKUP_URL, lookup_req,
166 'Looking up account info', 'Unable to look up account info')
167
168 if lookup_results is False:
169 return False
170
171 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
172 if not user_hash:
173 warn('Unable to extract user hash')
174 return False
175
176 challenge_req = [
177 user_hash,
178 None, 1, None, [1, None, None, None, [password, None, True]],
179 [
180 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
181 1, [None, None, []], None, None, None, True
182 ]]
183
184 challenge_results = req(
185 self._CHALLENGE_URL, challenge_req,
186 'Logging in', 'Unable to log in')
187
188 if challenge_results is False:
189 return
190
191 login_res = try_get(challenge_results, lambda x: x[0][5], list)
192 if login_res:
193 login_msg = try_get(login_res, lambda x: x[5], compat_str)
194 warn(
195 'Unable to login: %s' % 'Invalid password'
196 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
197 return False
198
199 res = try_get(challenge_results, lambda x: x[0][-1], list)
200 if not res:
201 warn('Unable to extract result entry')
202 return False
203
204 login_challenge = try_get(res, lambda x: x[0][0], list)
205 if login_challenge:
206 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
207 if challenge_str == 'TWO_STEP_VERIFICATION':
208 # SEND_SUCCESS - TFA code has been successfully sent to phone
209 # QUOTA_EXCEEDED - reached the limit of TFA codes
210 status = try_get(login_challenge, lambda x: x[5], compat_str)
211 if status == 'QUOTA_EXCEEDED':
212 warn('Exceeded the limit of TFA codes, try later')
213 return False
214
215 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
216 if not tl:
217 warn('Unable to extract TL')
218 return False
219
220 tfa_code = self._get_tfa_info('2-step verification code')
221
222 if not tfa_code:
223 warn(
224 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
225 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
226 return False
227
228 tfa_code = remove_start(tfa_code, 'G-')
229
230 tfa_req = [
231 user_hash, None, 2, None,
232 [
233 9, None, None, None, None, None, None, None,
234 [None, tfa_code, True, 2]
235 ]]
236
237 tfa_results = req(
238 self._TFA_URL.format(tl), tfa_req,
239 'Submitting TFA code', 'Unable to submit TFA code')
240
241 if tfa_results is False:
242 return False
243
244 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
245 if tfa_res:
246 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
247 warn(
248 'Unable to finish TFA: %s' % 'Invalid TFA code'
249 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
250 return False
251
252 check_cookie_url = try_get(
253 tfa_results, lambda x: x[0][-1][2], compat_str)
254 else:
255 CHALLENGES = {
256 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
257 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
258 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
259 }
260 challenge = CHALLENGES.get(
261 challenge_str,
262 '%s returned error %s.' % (self.IE_NAME, challenge_str))
263 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
264 return False
265 else:
266 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
267
268 if not check_cookie_url:
269 warn('Unable to extract CheckCookie URL')
270 return False
271
272 check_cookie_results = self._download_webpage(
273 check_cookie_url, None, 'Checking cookie', fatal=False)
274
275 if check_cookie_results is False:
276 return False
277
278 if 'https://myaccount.google.com/' not in check_cookie_results:
279 warn('Unable to log in')
280 return False
281
282 return True
283 '''
284
285 def _initialize_consent(self):
286 cookies = self._get_cookies('https://www.youtube.com/')
287 if cookies.get('__Secure-3PSID'):
288 return
289 consent_id = None
290 consent = cookies.get('CONSENT')
291 if consent:
292 if 'YES' in consent.value:
293 return
294 consent_id = self._search_regex(
295 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
296 if not consent_id:
297 consent_id = random.randint(100, 999)
298 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
299
300 def _real_initialize(self):
301 self._initialize_consent()
302 if self._downloader is None:
303 return
304 if not self._login():
305 return
306
307 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
308 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
309 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
310
311 _YT_DEFAULT_YTCFGS = {
312 'WEB': {
313 'INNERTUBE_API_VERSION': 'v1',
314 'INNERTUBE_CLIENT_NAME': 'WEB',
315 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
316 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
317 'INNERTUBE_CONTEXT': {
318 'client': {
319 'clientName': 'WEB',
320 'clientVersion': '2.20210622.10.00',
321 'hl': 'en',
322 }
323 },
324 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
325 },
326 'WEB_REMIX': {
327 'INNERTUBE_API_VERSION': 'v1',
328 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
329 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
330 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
331 'INNERTUBE_CONTEXT': {
332 'client': {
333 'clientName': 'WEB_REMIX',
334 'clientVersion': '1.20210621.00.00',
335 'hl': 'en',
336 }
337 },
338 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
339 },
340 'WEB_EMBEDDED_PLAYER': {
341 'INNERTUBE_API_VERSION': 'v1',
342 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
343 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
344 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
345 'INNERTUBE_CONTEXT': {
346 'client': {
347 'clientName': 'WEB_EMBEDDED_PLAYER',
348 'clientVersion': '1.20210620.0.1',
349 'hl': 'en',
350 }
351 },
352 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
353 },
354 'ANDROID': {
355 'INNERTUBE_API_VERSION': 'v1',
356 'INNERTUBE_CLIENT_NAME': 'ANDROID',
357 'INNERTUBE_CLIENT_VERSION': '16.20',
358 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
359 'INNERTUBE_CONTEXT': {
360 'client': {
361 'clientName': 'ANDROID',
362 'clientVersion': '16.20',
363 'hl': 'en',
364 }
365 },
366 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
367 },
368 'ANDROID_EMBEDDED_PLAYER': {
369 'INNERTUBE_API_VERSION': 'v1',
370 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
371 'INNERTUBE_CLIENT_VERSION': '16.20',
372 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
373 'INNERTUBE_CONTEXT': {
374 'client': {
375 'clientName': 'ANDROID_EMBEDDED_PLAYER',
376 'clientVersion': '16.20',
377 'hl': 'en',
378 }
379 },
380 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
381 },
382 'ANDROID_MUSIC': {
383 'INNERTUBE_API_VERSION': 'v1',
384 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
385 'INNERTUBE_CLIENT_VERSION': '4.32',
386 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
387 'INNERTUBE_CONTEXT': {
388 'client': {
389 'clientName': 'ANDROID_MUSIC',
390 'clientVersion': '4.32',
391 'hl': 'en',
392 }
393 },
394 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
395 }
396 }
397
398 _YT_DEFAULT_INNERTUBE_HOSTS = {
399 'DIRECT': 'youtubei.googleapis.com',
400 'WEB': 'www.youtube.com',
401 'WEB_REMIX': 'music.youtube.com',
402 'ANDROID_MUSIC': 'music.youtube.com'
403 }
404
405 def _get_default_ytcfg(self, client='WEB'):
406 if client in self._YT_DEFAULT_YTCFGS:
407 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
408 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
409 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
410
411 def _get_innertube_host(self, client='WEB'):
412 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
413
414 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
415 # try_get but with fallback to default ytcfg client values when present
416 _func = lambda y: try_get(y, getter, expected_type)
417 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
418
419 def _extract_client_name(self, ytcfg, default_client='WEB'):
420 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
421
422 @staticmethod
423 def _extract_session_index(ytcfg):
424 return int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
425
426 def _extract_client_version(self, ytcfg, default_client='WEB'):
427 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
428
429 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
430 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
431
432 def _extract_context(self, ytcfg=None, default_client='WEB'):
433 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
434 context = _get_context(ytcfg)
435 if context:
436 return context
437
438 context = _get_context(self._get_default_ytcfg(default_client))
439 if not ytcfg:
440 return context
441
442 # Recreate the client context (required)
443 context['client'].update({
444 'clientVersion': self._extract_client_version(ytcfg, default_client),
445 'clientName': self._extract_client_name(ytcfg, default_client),
446 })
447 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
448 if visitor_data:
449 context['client']['visitorData'] = visitor_data
450 return context
451
452 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
453 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
454 # See: https://github.com/yt-dlp/yt-dlp/issues/393
455 yt_cookies = self._get_cookies('https://www.youtube.com')
456 sapisid_cookie = dict_get(
457 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
458 if sapisid_cookie is None:
459 return
460 time_now = round(time.time())
461 # SAPISID cookie is required if not already present
462 if not yt_cookies.get('SAPISID'):
463 self._set_cookie(
464 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
465 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
466 sapisidhash = hashlib.sha1(
467 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
468 return f'SAPISIDHASH {time_now}_{sapisidhash}'
469
470 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
471 note='Downloading API JSON', errnote='Unable to download API page',
472 context=None, api_key=None, api_hostname=None, default_client='WEB'):
473
474 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
475 data.update(query)
476 real_headers = self._generate_api_headers(client=default_client)
477 real_headers.update({'content-type': 'application/json'})
478 if headers:
479 real_headers.update(headers)
480 return self._download_json(
481 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
482 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
483 data=json.dumps(data).encode('utf8'), headers=real_headers,
484 query={'key': api_key or self._extract_api_key()})
485
486 def _extract_yt_initial_data(self, video_id, webpage):
487 return self._parse_json(
488 self._search_regex(
489 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
490 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
491 video_id)
492
493 def _extract_identity_token(self, webpage, item_id):
494 ytcfg = self._extract_ytcfg(item_id, webpage)
495 if ytcfg:
496 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
497 if token:
498 return token
499 return self._search_regex(
500 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
501 'identity token', default=None)
502
503 @staticmethod
504 def _extract_account_syncid(*args):
505 """
506 Extract syncId required to download private playlists of secondary channels
507 @params response and/or ytcfg
508 """
509 for data in args:
510 # ytcfg includes channel_syncid if on secondary channel
511 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
512 if delegated_sid:
513 return delegated_sid
514 sync_ids = (try_get(
515 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
516 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
517 if len(sync_ids) >= 2 and sync_ids[1]:
518 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
519 # and just "user_syncid||" for primary channel. We only want the channel_syncid
520 return sync_ids[0]
521
522 def _extract_ytcfg(self, video_id, webpage):
523 if not webpage:
524 return {}
525 return self._parse_json(
526 self._search_regex(
527 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
528 default='{}'), video_id, fatal=False) or {}
529
530 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
531 visitor_data=None, api_hostname=None, client='WEB', session_index=None):
532 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
533 headers = {
534 'X-YouTube-Client-Name': compat_str(
535 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
536 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
537 'Origin': origin
538 }
539 if not visitor_data and ytcfg:
540 visitor_data = try_get(
541 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
542 if identity_token:
543 headers['X-Youtube-Identity-Token'] = identity_token
544 if account_syncid:
545 headers['X-Goog-PageId'] = account_syncid
546 if session_index is None and ytcfg:
547 session_index = self._extract_session_index(ytcfg)
548 if account_syncid or session_index is not None:
549 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
550 if visitor_data:
551 headers['X-Goog-Visitor-Id'] = visitor_data
552 auth = self._generate_sapisidhash_header(origin)
553 if auth is not None:
554 headers['Authorization'] = auth
555 headers['X-Origin'] = origin
556 return headers
557
558 @staticmethod
559 def _build_api_continuation_query(continuation, ctp=None):
560 query = {
561 'continuation': continuation
562 }
563 # TODO: Inconsistency with clickTrackingParams.
564 # Currently we have a fixed ctp contained within context (from ytcfg)
565 # and a ctp in root query for continuation.
566 if ctp:
567 query['clickTracking'] = {'clickTrackingParams': ctp}
568 return query
569
570 @classmethod
571 def _extract_next_continuation_data(cls, renderer):
572 next_continuation = try_get(
573 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
574 lambda x: x['continuation']['reloadContinuationData']), dict)
575 if not next_continuation:
576 return
577 continuation = next_continuation.get('continuation')
578 if not continuation:
579 return
580 ctp = next_continuation.get('clickTrackingParams')
581 return cls._build_api_continuation_query(continuation, ctp)
582
583 @classmethod
584 def _extract_continuation_ep_data(cls, continuation_ep: dict):
585 if isinstance(continuation_ep, dict):
586 continuation = try_get(
587 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
588 if not continuation:
589 return
590 ctp = continuation_ep.get('clickTrackingParams')
591 return cls._build_api_continuation_query(continuation, ctp)
592
593 @classmethod
594 def _extract_continuation(cls, renderer):
595 next_continuation = cls._extract_next_continuation_data(renderer)
596 if next_continuation:
597 return next_continuation
598
599 contents = []
600 for key in ('contents', 'items'):
601 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
602
603 for content in contents:
604 if not isinstance(content, dict):
605 continue
606 continuation_ep = try_get(
607 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
608 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
609 dict)
610 continuation = cls._extract_continuation_ep_data(continuation_ep)
611 if continuation:
612 return continuation
613
614 @classmethod
615 def _extract_alerts(cls, data):
616 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
617 if not isinstance(alert_dict, dict):
618 continue
619 for alert in alert_dict.values():
620 alert_type = alert.get('type')
621 if not alert_type:
622 continue
623 message = cls._get_text(alert.get('text'))
624 if message:
625 yield alert_type, message
626
627 def _report_alerts(self, alerts, expected=True):
628 errors = []
629 warnings = []
630 for alert_type, alert_message in alerts:
631 if alert_type.lower() == 'error':
632 errors.append([alert_type, alert_message])
633 else:
634 warnings.append([alert_type, alert_message])
635
636 for alert_type, alert_message in (warnings + errors[:-1]):
637 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
638 if errors:
639 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
640
641 def _extract_and_report_alerts(self, data, *args, **kwargs):
642 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
643
644 def _extract_badges(self, renderer: dict):
645 badges = set()
646 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
647 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
648 if label:
649 badges.add(label.lower())
650 return badges
651
652 @staticmethod
653 def _get_text(data, getter=None, max_runs=None):
654 for get in variadic(getter):
655 d = try_get(data, get) if get is not None else data
656 text = try_get(d, lambda x: x['simpleText'], compat_str)
657 if text:
658 return text
659 runs = try_get(d, lambda x: x['runs'], list) or []
660 if not runs and isinstance(d, list):
661 runs = d
662
663 def get_runs(runs):
664 for run in runs[:min(len(runs), max_runs or len(runs))]:
665 yield try_get(run, lambda x: x['text'], compat_str) or ''
666
667 text = ''.join(get_runs(runs))
668 if text:
669 return text
670
671 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
672 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
673 default_client='WEB'):
674 response = None
675 last_error = None
676 count = -1
677 retries = self.get_param('extractor_retries', 3)
678 if check_get_keys is None:
679 check_get_keys = []
680 while count < retries:
681 count += 1
682 if last_error:
683 self.report_warning('%s. Retrying ...' % last_error)
684 try:
685 response = self._call_api(
686 ep=ep, fatal=True, headers=headers,
687 video_id=item_id, query=query,
688 context=self._extract_context(ytcfg, default_client),
689 api_key=self._extract_api_key(ytcfg, default_client),
690 api_hostname=api_hostname, default_client=default_client,
691 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
692 except ExtractorError as e:
693 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
694 # Downloading page may result in intermittent 5xx HTTP error
695 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
696 last_error = 'HTTP Error %s' % e.cause.code
697 if count < retries:
698 continue
699 if fatal:
700 raise
701 else:
702 self.report_warning(error_to_compat_str(e))
703 return
704
705 else:
706 # Youtube may send alerts if there was an issue with the continuation page
707 try:
708 self._extract_and_report_alerts(response, expected=False)
709 except ExtractorError as e:
710 if fatal:
711 raise
712 self.report_warning(error_to_compat_str(e))
713 return
714 if not check_get_keys or dict_get(response, check_get_keys):
715 break
716 # Youtube sometimes sends incomplete data
717 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
718 last_error = 'Incomplete data received'
719 if count >= retries:
720 if fatal:
721 raise ExtractorError(last_error)
722 else:
723 self.report_warning(last_error)
724 return
725 return response
726
727 @staticmethod
728 def is_music_url(url):
729 return re.match(r'https?://music\.youtube\.com/', url) is not None
730
731 def _extract_video(self, renderer):
732 video_id = renderer.get('videoId')
733 title = self._get_text(renderer.get('title'))
734 description = self._get_text(renderer.get('descriptionSnippet'))
735 duration = parse_duration(self._get_text(renderer.get('lengthText')))
736 view_count_text = self._get_text(renderer.get('viewCountText')) or ''
737 view_count = str_to_int(self._search_regex(
738 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
739 'view count', default=None))
740
741 uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
742
743 return {
744 '_type': 'url',
745 'ie_key': YoutubeIE.ie_key(),
746 'id': video_id,
747 'url': video_id,
748 'title': title,
749 'description': description,
750 'duration': duration,
751 'view_count': view_count,
752 'uploader': uploader,
753 }
754
755
756 class YoutubeIE(YoutubeBaseInfoExtractor):
757 IE_DESC = 'YouTube.com'
758 _INVIDIOUS_SITES = (
759 # invidious-redirect websites
760 r'(?:www\.)?redirect\.invidious\.io',
761 r'(?:(?:www|dev)\.)?invidio\.us',
762 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
763 r'(?:www\.)?invidious\.pussthecat\.org',
764 r'(?:www\.)?invidious\.zee\.li',
765 r'(?:www\.)?invidious\.ethibox\.fr',
766 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
767 # youtube-dl invidious instances list
768 r'(?:(?:www|no)\.)?invidiou\.sh',
769 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
770 r'(?:www\.)?invidious\.kabi\.tk',
771 r'(?:www\.)?invidious\.mastodon\.host',
772 r'(?:www\.)?invidious\.zapashcanon\.fr',
773 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
774 r'(?:www\.)?invidious\.tinfoil-hat\.net',
775 r'(?:www\.)?invidious\.himiko\.cloud',
776 r'(?:www\.)?invidious\.reallyancient\.tech',
777 r'(?:www\.)?invidious\.tube',
778 r'(?:www\.)?invidiou\.site',
779 r'(?:www\.)?invidious\.site',
780 r'(?:www\.)?invidious\.xyz',
781 r'(?:www\.)?invidious\.nixnet\.xyz',
782 r'(?:www\.)?invidious\.048596\.xyz',
783 r'(?:www\.)?invidious\.drycat\.fr',
784 r'(?:www\.)?inv\.skyn3t\.in',
785 r'(?:www\.)?tube\.poal\.co',
786 r'(?:www\.)?tube\.connect\.cafe',
787 r'(?:www\.)?vid\.wxzm\.sx',
788 r'(?:www\.)?vid\.mint\.lgbt',
789 r'(?:www\.)?vid\.puffyan\.us',
790 r'(?:www\.)?yewtu\.be',
791 r'(?:www\.)?yt\.elukerio\.org',
792 r'(?:www\.)?yt\.lelux\.fi',
793 r'(?:www\.)?invidious\.ggc-project\.de',
794 r'(?:www\.)?yt\.maisputain\.ovh',
795 r'(?:www\.)?ytprivate\.com',
796 r'(?:www\.)?invidious\.13ad\.de',
797 r'(?:www\.)?invidious\.toot\.koeln',
798 r'(?:www\.)?invidious\.fdn\.fr',
799 r'(?:www\.)?watch\.nettohikari\.com',
800 r'(?:www\.)?invidious\.namazso\.eu',
801 r'(?:www\.)?invidious\.silkky\.cloud',
802 r'(?:www\.)?invidious\.exonip\.de',
803 r'(?:www\.)?invidious\.riverside\.rocks',
804 r'(?:www\.)?invidious\.blamefran\.net',
805 r'(?:www\.)?invidious\.moomoo\.de',
806 r'(?:www\.)?ytb\.trom\.tf',
807 r'(?:www\.)?yt\.cyberhost\.uk',
808 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
809 r'(?:www\.)?qklhadlycap4cnod\.onion',
810 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
811 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
812 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
813 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
814 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
815 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
816 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
817 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
818 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
819 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
820 )
821 _VALID_URL = r"""(?x)^
822 (
823 (?:https?://|//) # http(s):// or protocol-independent URL
824 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
825 (?:www\.)?deturl\.com/www\.youtube\.com|
826 (?:www\.)?pwnyoutube\.com|
827 (?:www\.)?hooktube\.com|
828 (?:www\.)?yourepeat\.com|
829 tube\.majestyc\.net|
830 %(invidious)s|
831 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
832 (?:.*?\#/)? # handle anchor (#/) redirect urls
833 (?: # the various things that can precede the ID:
834 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
835 |(?: # or the v= param in all its forms
836 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
837 (?:\?|\#!?) # the params delimiter ? or # or #!
838 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
839 v=
840 )
841 ))
842 |(?:
843 youtu\.be| # just youtu.be/xxxx
844 vid\.plus| # or vid.plus/xxxx
845 zwearz\.com/watch| # or zwearz.com/watch/xxxx
846 %(invidious)s
847 )/
848 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
849 )
850 )? # all until now is optional -> you can pass the naked ID
851 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
852 (?(1).+)? # if we found the ID, everything can follow
853 (?:\#|$)""" % {
854 'invidious': '|'.join(_INVIDIOUS_SITES),
855 }
856 _PLAYER_INFO_RE = (
857 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
858 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
859 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
860 )
861 _formats = {
862 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
863 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
864 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
865 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
866 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
867 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
868 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
869 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
870 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
871 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
872 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
873 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
874 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
875 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
876 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
877 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
878 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
879 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
880
881
882 # 3D videos
883 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
884 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
885 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
886 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
887 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
888 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
889 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
890
891 # Apple HTTP Live Streaming
892 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
893 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
894 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
895 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
896 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
897 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
898 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
899 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
900
901 # DASH mp4 video
902 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
903 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
904 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
905 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
906 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
907 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
908 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
909 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
910 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
911 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
912 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
913 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
914
915 # Dash mp4 audio
916 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
917 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
918 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
919 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
920 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
921 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
922 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
923
924 # Dash webm
925 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
926 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
927 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
928 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
929 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
930 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
931 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
932 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
933 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
934 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
935 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
936 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
937 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
938 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
939 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
940 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
941 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
942 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
943 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
944 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
945 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
946 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
947
948 # Dash webm audio
949 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
950 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
951
952 # Dash webm audio with opus inside
953 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
954 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
955 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
956
957 # RTMP (unnamed)
958 '_rtmp': {'protocol': 'rtmp'},
959
960 # av01 video only formats sometimes served with "unknown" codecs
961 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
962 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
963 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
964 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
965 }
966 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
967
968 _AGE_GATE_REASONS = (
969 'Sign in to confirm your age',
970 'This video may be inappropriate for some users.',
971 'Sorry, this content is age-restricted.')
972
973 _GEO_BYPASS = False
974
975 IE_NAME = 'youtube'
976 _TESTS = [
977 {
978 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
979 'info_dict': {
980 'id': 'BaW_jenozKc',
981 'ext': 'mp4',
982 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
983 'uploader': 'Philipp Hagemeister',
984 'uploader_id': 'phihag',
985 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
986 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
987 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
988 'upload_date': '20121002',
989 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
990 'categories': ['Science & Technology'],
991 'tags': ['youtube-dl'],
992 'duration': 10,
993 'view_count': int,
994 'like_count': int,
995 'dislike_count': int,
996 'start_time': 1,
997 'end_time': 9,
998 }
999 },
1000 {
1001 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1002 'note': 'Embed-only video (#1746)',
1003 'info_dict': {
1004 'id': 'yZIXLfi8CZQ',
1005 'ext': 'mp4',
1006 'upload_date': '20120608',
1007 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1008 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1009 'uploader': 'SET India',
1010 'uploader_id': 'setindia',
1011 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1012 'age_limit': 18,
1013 },
1014 'skip': 'Private video',
1015 },
1016 {
1017 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1018 'note': 'Use the first video ID in the URL',
1019 'info_dict': {
1020 'id': 'BaW_jenozKc',
1021 'ext': 'mp4',
1022 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1023 'uploader': 'Philipp Hagemeister',
1024 'uploader_id': 'phihag',
1025 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1026 'upload_date': '20121002',
1027 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1028 'categories': ['Science & Technology'],
1029 'tags': ['youtube-dl'],
1030 'duration': 10,
1031 'view_count': int,
1032 'like_count': int,
1033 'dislike_count': int,
1034 },
1035 'params': {
1036 'skip_download': True,
1037 },
1038 },
1039 {
1040 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1041 'note': '256k DASH audio (format 141) via DASH manifest',
1042 'info_dict': {
1043 'id': 'a9LDPn-MO4I',
1044 'ext': 'm4a',
1045 'upload_date': '20121002',
1046 'uploader_id': '8KVIDEO',
1047 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1048 'description': '',
1049 'uploader': '8KVIDEO',
1050 'title': 'UHDTV TEST 8K VIDEO.mp4'
1051 },
1052 'params': {
1053 'youtube_include_dash_manifest': True,
1054 'format': '141',
1055 },
1056 'skip': 'format 141 not served anymore',
1057 },
1058 # DASH manifest with encrypted signature
1059 {
1060 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1061 'info_dict': {
1062 'id': 'IB3lcPjvWLA',
1063 'ext': 'm4a',
1064 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1065 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1066 'duration': 244,
1067 'uploader': 'AfrojackVEVO',
1068 'uploader_id': 'AfrojackVEVO',
1069 'upload_date': '20131011',
1070 'abr': 129.495,
1071 },
1072 'params': {
1073 'youtube_include_dash_manifest': True,
1074 'format': '141/bestaudio[ext=m4a]',
1075 },
1076 },
1077 # Normal age-gate video (embed allowed)
1078 {
1079 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1080 'info_dict': {
1081 'id': 'HtVdAasjOgU',
1082 'ext': 'mp4',
1083 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1084 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1085 'duration': 142,
1086 'uploader': 'The Witcher',
1087 'uploader_id': 'WitcherGame',
1088 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1089 'upload_date': '20140605',
1090 'age_limit': 18,
1091 },
1092 },
1093 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1094 # YouTube Red ad is not captured for creator
1095 {
1096 'url': '__2ABJjxzNo',
1097 'info_dict': {
1098 'id': '__2ABJjxzNo',
1099 'ext': 'mp4',
1100 'duration': 266,
1101 'upload_date': '20100430',
1102 'uploader_id': 'deadmau5',
1103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1104 'creator': 'deadmau5',
1105 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1106 'uploader': 'deadmau5',
1107 'title': 'Deadmau5 - Some Chords (HD)',
1108 'alt_title': 'Some Chords',
1109 },
1110 'expected_warnings': [
1111 'DASH manifest missing',
1112 ]
1113 },
1114 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1115 {
1116 'url': 'lqQg6PlCWgI',
1117 'info_dict': {
1118 'id': 'lqQg6PlCWgI',
1119 'ext': 'mp4',
1120 'duration': 6085,
1121 'upload_date': '20150827',
1122 'uploader_id': 'olympic',
1123 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1124 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1125 'uploader': 'Olympic',
1126 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1127 },
1128 'params': {
1129 'skip_download': 'requires avconv',
1130 }
1131 },
1132 # Non-square pixels
1133 {
1134 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1135 'info_dict': {
1136 'id': '_b-2C3KPAM0',
1137 'ext': 'mp4',
1138 'stretched_ratio': 16 / 9.,
1139 'duration': 85,
1140 'upload_date': '20110310',
1141 'uploader_id': 'AllenMeow',
1142 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1143 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1144 'uploader': '孫ᄋᄅ',
1145 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1146 },
1147 },
1148 # url_encoded_fmt_stream_map is empty string
1149 {
1150 'url': 'qEJwOuvDf7I',
1151 'info_dict': {
1152 'id': 'qEJwOuvDf7I',
1153 'ext': 'webm',
1154 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1155 'description': '',
1156 'upload_date': '20150404',
1157 'uploader_id': 'spbelect',
1158 'uploader': 'Наблюдатели Петербурга',
1159 },
1160 'params': {
1161 'skip_download': 'requires avconv',
1162 },
1163 'skip': 'This live event has ended.',
1164 },
1165 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1166 {
1167 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1168 'info_dict': {
1169 'id': 'FIl7x6_3R5Y',
1170 'ext': 'webm',
1171 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1172 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1173 'duration': 220,
1174 'upload_date': '20150625',
1175 'uploader_id': 'dorappi2000',
1176 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1177 'uploader': 'dorappi2000',
1178 'formats': 'mincount:31',
1179 },
1180 'skip': 'not actual anymore',
1181 },
1182 # DASH manifest with segment_list
1183 {
1184 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1185 'md5': '8ce563a1d667b599d21064e982ab9e31',
1186 'info_dict': {
1187 'id': 'CsmdDsKjzN8',
1188 'ext': 'mp4',
1189 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1190 'uploader': 'Airtek',
1191 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1192 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1193 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1194 },
1195 'params': {
1196 'youtube_include_dash_manifest': True,
1197 'format': '135', # bestvideo
1198 },
1199 'skip': 'This live event has ended.',
1200 },
1201 {
1202 # Multifeed videos (multiple cameras), URL is for Main Camera
1203 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1204 'info_dict': {
1205 'id': 'jvGDaLqkpTg',
1206 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1207 'description': 'md5:e03b909557865076822aa169218d6a5d',
1208 },
1209 'playlist': [{
1210 'info_dict': {
1211 'id': 'jvGDaLqkpTg',
1212 'ext': 'mp4',
1213 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1214 'description': 'md5:e03b909557865076822aa169218d6a5d',
1215 'duration': 10643,
1216 'upload_date': '20161111',
1217 'uploader': 'Team PGP',
1218 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1219 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1220 },
1221 }, {
1222 'info_dict': {
1223 'id': '3AKt1R1aDnw',
1224 'ext': 'mp4',
1225 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1226 'description': 'md5:e03b909557865076822aa169218d6a5d',
1227 'duration': 10991,
1228 'upload_date': '20161111',
1229 'uploader': 'Team PGP',
1230 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1231 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1232 },
1233 }, {
1234 'info_dict': {
1235 'id': 'RtAMM00gpVc',
1236 'ext': 'mp4',
1237 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1238 'description': 'md5:e03b909557865076822aa169218d6a5d',
1239 'duration': 10995,
1240 'upload_date': '20161111',
1241 'uploader': 'Team PGP',
1242 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1243 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1244 },
1245 }, {
1246 'info_dict': {
1247 'id': '6N2fdlP3C5U',
1248 'ext': 'mp4',
1249 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1250 'description': 'md5:e03b909557865076822aa169218d6a5d',
1251 'duration': 10990,
1252 'upload_date': '20161111',
1253 'uploader': 'Team PGP',
1254 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1255 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1256 },
1257 }],
1258 'params': {
1259 'skip_download': True,
1260 },
1261 },
1262 {
1263 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1264 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1265 'info_dict': {
1266 'id': 'gVfLd0zydlo',
1267 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1268 },
1269 'playlist_count': 2,
1270 'skip': 'Not multifeed anymore',
1271 },
1272 {
1273 'url': 'https://vid.plus/FlRa-iH7PGw',
1274 'only_matching': True,
1275 },
1276 {
1277 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1278 'only_matching': True,
1279 },
1280 {
1281 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1282 # Also tests cut-off URL expansion in video description (see
1283 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1284 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1285 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1286 'info_dict': {
1287 'id': 'lsguqyKfVQg',
1288 'ext': 'mp4',
1289 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1290 'alt_title': 'Dark Walk - Position Music',
1291 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1292 'duration': 133,
1293 'upload_date': '20151119',
1294 'uploader_id': 'IronSoulElf',
1295 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1296 'uploader': 'IronSoulElf',
1297 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1298 'track': 'Dark Walk - Position Music',
1299 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1300 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1301 },
1302 'params': {
1303 'skip_download': True,
1304 },
1305 },
1306 {
1307 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1308 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1309 'only_matching': True,
1310 },
1311 {
1312 # Video with yt:stretch=17:0
1313 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1314 'info_dict': {
1315 'id': 'Q39EVAstoRM',
1316 'ext': 'mp4',
1317 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1318 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1319 'upload_date': '20151107',
1320 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1321 'uploader': 'CH GAMER DROID',
1322 },
1323 'params': {
1324 'skip_download': True,
1325 },
1326 'skip': 'This video does not exist.',
1327 },
1328 {
1329 # Video with incomplete 'yt:stretch=16:'
1330 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1331 'only_matching': True,
1332 },
1333 {
1334 # Video licensed under Creative Commons
1335 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1336 'info_dict': {
1337 'id': 'M4gD1WSo5mA',
1338 'ext': 'mp4',
1339 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1340 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1341 'duration': 721,
1342 'upload_date': '20150127',
1343 'uploader_id': 'BerkmanCenter',
1344 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1345 'uploader': 'The Berkman Klein Center for Internet & Society',
1346 'license': 'Creative Commons Attribution license (reuse allowed)',
1347 },
1348 'params': {
1349 'skip_download': True,
1350 },
1351 },
1352 {
1353 # Channel-like uploader_url
1354 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1355 'info_dict': {
1356 'id': 'eQcmzGIKrzg',
1357 'ext': 'mp4',
1358 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1359 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1360 'duration': 4060,
1361 'upload_date': '20151119',
1362 'uploader': 'Bernie Sanders',
1363 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1364 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1365 'license': 'Creative Commons Attribution license (reuse allowed)',
1366 },
1367 'params': {
1368 'skip_download': True,
1369 },
1370 },
1371 {
1372 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1373 'only_matching': True,
1374 },
1375 {
1376 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1377 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1378 'only_matching': True,
1379 },
1380 {
1381 # Rental video preview
1382 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1383 'info_dict': {
1384 'id': 'uGpuVWrhIzE',
1385 'ext': 'mp4',
1386 'title': 'Piku - Trailer',
1387 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1388 'upload_date': '20150811',
1389 'uploader': 'FlixMatrix',
1390 'uploader_id': 'FlixMatrixKaravan',
1391 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1392 'license': 'Standard YouTube License',
1393 },
1394 'params': {
1395 'skip_download': True,
1396 },
1397 'skip': 'This video is not available.',
1398 },
1399 {
1400 # YouTube Red video with episode data
1401 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1402 'info_dict': {
1403 'id': 'iqKdEhx-dD4',
1404 'ext': 'mp4',
1405 'title': 'Isolation - Mind Field (Ep 1)',
1406 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1407 'duration': 2085,
1408 'upload_date': '20170118',
1409 'uploader': 'Vsauce',
1410 'uploader_id': 'Vsauce',
1411 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1412 'series': 'Mind Field',
1413 'season_number': 1,
1414 'episode_number': 1,
1415 },
1416 'params': {
1417 'skip_download': True,
1418 },
1419 'expected_warnings': [
1420 'Skipping DASH manifest',
1421 ],
1422 },
1423 {
1424 # The following content has been identified by the YouTube community
1425 # as inappropriate or offensive to some audiences.
1426 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1427 'info_dict': {
1428 'id': '6SJNVb0GnPI',
1429 'ext': 'mp4',
1430 'title': 'Race Differences in Intelligence',
1431 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1432 'duration': 965,
1433 'upload_date': '20140124',
1434 'uploader': 'New Century Foundation',
1435 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1436 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1437 },
1438 'params': {
1439 'skip_download': True,
1440 },
1441 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1442 },
1443 {
1444 # itag 212
1445 'url': '1t24XAntNCY',
1446 'only_matching': True,
1447 },
1448 {
1449 # geo restricted to JP
1450 'url': 'sJL6WA-aGkQ',
1451 'only_matching': True,
1452 },
1453 {
1454 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1455 'only_matching': True,
1456 },
1457 {
1458 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1459 'only_matching': True,
1460 },
1461 {
1462 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1463 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1464 'only_matching': True,
1465 },
1466 {
1467 # DRM protected
1468 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1469 'only_matching': True,
1470 },
1471 {
1472 # Video with unsupported adaptive stream type formats
1473 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1474 'info_dict': {
1475 'id': 'Z4Vy8R84T1U',
1476 'ext': 'mp4',
1477 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1478 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1479 'duration': 433,
1480 'upload_date': '20130923',
1481 'uploader': 'Amelia Putri Harwita',
1482 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1483 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1484 'formats': 'maxcount:10',
1485 },
1486 'params': {
1487 'skip_download': True,
1488 'youtube_include_dash_manifest': False,
1489 },
1490 'skip': 'not actual anymore',
1491 },
1492 {
1493 # Youtube Music Auto-generated description
1494 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1495 'info_dict': {
1496 'id': 'MgNrAu2pzNs',
1497 'ext': 'mp4',
1498 'title': 'Voyeur Girl',
1499 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1500 'upload_date': '20190312',
1501 'uploader': 'Stephen - Topic',
1502 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1503 'artist': 'Stephen',
1504 'track': 'Voyeur Girl',
1505 'album': 'it\'s too much love to know my dear',
1506 'release_date': '20190313',
1507 'release_year': 2019,
1508 },
1509 'params': {
1510 'skip_download': True,
1511 },
1512 },
1513 {
1514 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1515 'only_matching': True,
1516 },
1517 {
1518 # invalid -> valid video id redirection
1519 'url': 'DJztXj2GPfl',
1520 'info_dict': {
1521 'id': 'DJztXj2GPfk',
1522 'ext': 'mp4',
1523 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1524 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1525 'upload_date': '20090125',
1526 'uploader': 'Prochorowka',
1527 'uploader_id': 'Prochorowka',
1528 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1529 'artist': 'Panjabi MC',
1530 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1531 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1532 },
1533 'params': {
1534 'skip_download': True,
1535 },
1536 'skip': 'Video unavailable',
1537 },
1538 {
1539 # empty description results in an empty string
1540 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1541 'info_dict': {
1542 'id': 'x41yOUIvK2k',
1543 'ext': 'mp4',
1544 'title': 'IMG 3456',
1545 'description': '',
1546 'upload_date': '20170613',
1547 'uploader_id': 'ElevageOrVert',
1548 'uploader': 'ElevageOrVert',
1549 },
1550 'params': {
1551 'skip_download': True,
1552 },
1553 },
1554 {
1555 # with '};' inside yt initial data (see [1])
1556 # see [2] for an example with '};' inside ytInitialPlayerResponse
1557 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1558 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1559 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1560 'info_dict': {
1561 'id': 'CHqg6qOn4no',
1562 'ext': 'mp4',
1563 'title': 'Part 77 Sort a list of simple types in c#',
1564 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1565 'upload_date': '20130831',
1566 'uploader_id': 'kudvenkat',
1567 'uploader': 'kudvenkat',
1568 },
1569 'params': {
1570 'skip_download': True,
1571 },
1572 },
1573 {
1574 # another example of '};' in ytInitialData
1575 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1576 'only_matching': True,
1577 },
1578 {
1579 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1580 'only_matching': True,
1581 },
1582 {
1583 # https://github.com/ytdl-org/youtube-dl/pull/28094
1584 'url': 'OtqTfy26tG0',
1585 'info_dict': {
1586 'id': 'OtqTfy26tG0',
1587 'ext': 'mp4',
1588 'title': 'Burn Out',
1589 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1590 'upload_date': '20141120',
1591 'uploader': 'The Cinematic Orchestra - Topic',
1592 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1593 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1594 'artist': 'The Cinematic Orchestra',
1595 'track': 'Burn Out',
1596 'album': 'Every Day',
1597 'release_data': None,
1598 'release_year': None,
1599 },
1600 'params': {
1601 'skip_download': True,
1602 },
1603 },
1604 {
1605 # controversial video, only works with bpctr when authenticated with cookies
1606 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1607 'only_matching': True,
1608 },
1609 {
1610 # controversial video, requires bpctr/contentCheckOk
1611 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1612 'info_dict': {
1613 'id': 'SZJvDhaSDnc',
1614 'ext': 'mp4',
1615 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1616 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1617 'uploader': 'CBS This Morning',
1618 'upload_date': '20140716',
1619 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1620 }
1621 },
1622 {
1623 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1624 'url': 'cBvYw8_A0vQ',
1625 'info_dict': {
1626 'id': 'cBvYw8_A0vQ',
1627 'ext': 'mp4',
1628 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1629 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1630 'upload_date': '20201120',
1631 'uploader': 'Walk around Japan',
1632 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1633 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1634 },
1635 'params': {
1636 'skip_download': True,
1637 },
1638 }, {
1639 # Has multiple audio streams
1640 'url': 'WaOKSUlf4TM',
1641 'only_matching': True
1642 }, {
1643 # Requires Premium: has format 141 when requested using YTM url
1644 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1645 'only_matching': True
1646 }, {
1647 # multiple subtitles with same lang_code
1648 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1649 'only_matching': True,
1650 }, {
1651 # Force use android client fallback
1652 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1653 'info_dict': {
1654 'id': 'YOelRv7fMxY',
1655 'title': 'Digging a Secret Tunnel from my Workshop',
1656 'ext': '3gp',
1657 'upload_date': '20210624',
1658 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1659 'uploader': 'colinfurze',
1660 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1661 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1662 },
1663 'params': {
1664 'format': '17', # 3gp format available on android
1665 'extractor_args': {'youtube': {'player_client': ['android']}},
1666 },
1667 },
1668 {
1669 # Skip download of additional client configs (remix client config in this case)
1670 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1671 'only_matching': True,
1672 'params': {
1673 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1674 },
1675 }
1676 ]
1677
1678 @classmethod
1679 def suitable(cls, url):
1680 # Hack for lazy extractors until more generic solution is implemented
1681 # (see #28780)
1682 from .youtube import parse_qs
1683 qs = parse_qs(url)
1684 if qs.get('list', [None])[0]:
1685 return False
1686 return super(YoutubeIE, cls).suitable(url)
1687
1688 def __init__(self, *args, **kwargs):
1689 super(YoutubeIE, self).__init__(*args, **kwargs)
1690 self._code_cache = {}
1691 self._player_cache = {}
1692
1693 def _extract_player_url(self, ytcfg=None, webpage=None):
1694 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1695 if not player_url:
1696 player_url = self._search_regex(
1697 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1698 webpage, 'player URL', fatal=False)
1699 if player_url.startswith('//'):
1700 player_url = 'https:' + player_url
1701 elif not re.match(r'https?://', player_url):
1702 player_url = compat_urlparse.urljoin(
1703 'https://www.youtube.com', player_url)
1704 return player_url
1705
1706 def _signature_cache_id(self, example_sig):
1707 """ Return a string representation of a signature """
1708 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1709
1710 @classmethod
1711 def _extract_player_info(cls, player_url):
1712 for player_re in cls._PLAYER_INFO_RE:
1713 id_m = re.search(player_re, player_url)
1714 if id_m:
1715 break
1716 else:
1717 raise ExtractorError('Cannot identify player %r' % player_url)
1718 return id_m.group('id')
1719
1720 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1721 player_id = self._extract_player_info(player_url)
1722 if player_id not in self._code_cache:
1723 self._code_cache[player_id] = self._download_webpage(
1724 player_url, video_id, fatal=fatal,
1725 note='Downloading player ' + player_id,
1726 errnote='Download of %s failed' % player_url)
1727 return player_id in self._code_cache
1728
1729 def _extract_signature_function(self, video_id, player_url, example_sig):
1730 player_id = self._extract_player_info(player_url)
1731
1732 # Read from filesystem cache
1733 func_id = 'js_%s_%s' % (
1734 player_id, self._signature_cache_id(example_sig))
1735 assert os.path.basename(func_id) == func_id
1736
1737 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1738 if cache_spec is not None:
1739 return lambda s: ''.join(s[i] for i in cache_spec)
1740
1741 if self._load_player(video_id, player_url):
1742 code = self._code_cache[player_id]
1743 res = self._parse_sig_js(code)
1744
1745 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1746 cache_res = res(test_string)
1747 cache_spec = [ord(c) for c in cache_res]
1748
1749 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1750 return res
1751
1752 def _print_sig_code(self, func, example_sig):
1753 def gen_sig_code(idxs):
1754 def _genslice(start, end, step):
1755 starts = '' if start == 0 else str(start)
1756 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1757 steps = '' if step == 1 else (':%d' % step)
1758 return 's[%s%s%s]' % (starts, ends, steps)
1759
1760 step = None
1761 # Quelch pyflakes warnings - start will be set when step is set
1762 start = '(Never used)'
1763 for i, prev in zip(idxs[1:], idxs[:-1]):
1764 if step is not None:
1765 if i - prev == step:
1766 continue
1767 yield _genslice(start, prev, step)
1768 step = None
1769 continue
1770 if i - prev in [-1, 1]:
1771 step = i - prev
1772 start = prev
1773 continue
1774 else:
1775 yield 's[%d]' % prev
1776 if step is None:
1777 yield 's[%d]' % i
1778 else:
1779 yield _genslice(start, i, step)
1780
1781 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1782 cache_res = func(test_string)
1783 cache_spec = [ord(c) for c in cache_res]
1784 expr_code = ' + '.join(gen_sig_code(cache_spec))
1785 signature_id_tuple = '(%s)' % (
1786 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1787 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1788 ' return %s\n') % (signature_id_tuple, expr_code)
1789 self.to_screen('Extracted signature function:\n' + code)
1790
1791 def _parse_sig_js(self, jscode):
1792 funcname = self._search_regex(
1793 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1794 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1795 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1796 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1797 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1798 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1799 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1800 # Obsolete patterns
1801 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1802 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1803 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1804 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1805 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1806 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1807 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1808 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1809 jscode, 'Initial JS player signature function name', group='sig')
1810
1811 jsi = JSInterpreter(jscode)
1812 initial_function = jsi.extract_function(funcname)
1813 return lambda s: initial_function([s])
1814
1815 def _decrypt_signature(self, s, video_id, player_url):
1816 """Turn the encrypted s field into a working signature"""
1817
1818 if player_url is None:
1819 raise ExtractorError('Cannot decrypt signature without player_url')
1820
1821 try:
1822 player_id = (player_url, self._signature_cache_id(s))
1823 if player_id not in self._player_cache:
1824 func = self._extract_signature_function(
1825 video_id, player_url, s
1826 )
1827 self._player_cache[player_id] = func
1828 func = self._player_cache[player_id]
1829 if self.get_param('youtube_print_sig_code'):
1830 self._print_sig_code(func, s)
1831 return func(s)
1832 except Exception as e:
1833 tb = traceback.format_exc()
1834 raise ExtractorError(
1835 'Signature extraction failed: ' + tb, cause=e)
1836
1837 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1838 """
1839 Extract signatureTimestamp (sts)
1840 Required to tell API what sig/player version is in use.
1841 """
1842 sts = None
1843 if isinstance(ytcfg, dict):
1844 sts = int_or_none(ytcfg.get('STS'))
1845
1846 if not sts:
1847 # Attempt to extract from player
1848 if player_url is None:
1849 error_msg = 'Cannot extract signature timestamp without player_url.'
1850 if fatal:
1851 raise ExtractorError(error_msg)
1852 self.report_warning(error_msg)
1853 return
1854 if self._load_player(video_id, player_url, fatal=fatal):
1855 player_id = self._extract_player_info(player_url)
1856 code = self._code_cache[player_id]
1857 sts = int_or_none(self._search_regex(
1858 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1859 'JS player signature timestamp', group='sts', fatal=fatal))
1860 return sts
1861
1862 def _mark_watched(self, video_id, player_response):
1863 playback_url = url_or_none(try_get(
1864 player_response,
1865 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
1866 if not playback_url:
1867 return
1868 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1869 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1870
1871 # cpn generation algorithm is reverse engineered from base.js.
1872 # In fact it works even with dummy cpn.
1873 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1874 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1875
1876 qs.update({
1877 'ver': ['2'],
1878 'cpn': [cpn],
1879 })
1880 playback_url = compat_urlparse.urlunparse(
1881 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1882
1883 self._download_webpage(
1884 playback_url, video_id, 'Marking watched',
1885 'Unable to mark watched', fatal=False)
1886
1887 @staticmethod
1888 def _extract_urls(webpage):
1889 # Embedded YouTube player
1890 entries = [
1891 unescapeHTML(mobj.group('url'))
1892 for mobj in re.finditer(r'''(?x)
1893 (?:
1894 <iframe[^>]+?src=|
1895 data-video-url=|
1896 <embed[^>]+?src=|
1897 embedSWF\(?:\s*|
1898 <object[^>]+data=|
1899 new\s+SWFObject\(
1900 )
1901 (["\'])
1902 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1903 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1904 \1''', webpage)]
1905
1906 # lazyYT YouTube embed
1907 entries.extend(list(map(
1908 unescapeHTML,
1909 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1910
1911 # Wordpress "YouTube Video Importer" plugin
1912 matches = re.findall(r'''(?x)<div[^>]+
1913 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1914 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1915 entries.extend(m[-1] for m in matches)
1916
1917 return entries
1918
1919 @staticmethod
1920 def _extract_url(webpage):
1921 urls = YoutubeIE._extract_urls(webpage)
1922 return urls[0] if urls else None
1923
1924 @classmethod
1925 def extract_id(cls, url):
1926 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1927 if mobj is None:
1928 raise ExtractorError('Invalid URL: %s' % url)
1929 video_id = mobj.group(2)
1930 return video_id
1931
1932 def _extract_chapters_from_json(self, data, duration):
1933 chapter_list = traverse_obj(
1934 data, (
1935 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
1936 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
1937 ), expected_type=list)
1938
1939 return self._extract_chapters(
1940 chapter_list,
1941 chapter_time=lambda chapter: float_or_none(
1942 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
1943 chapter_title=lambda chapter: traverse_obj(
1944 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
1945 duration=duration)
1946
1947 def _extract_chapters_from_engagement_panel(self, data, duration):
1948 content_list = traverse_obj(
1949 data,
1950 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
1951 expected_type=list, default=[])
1952 chapter_time = lambda chapter: parse_duration(self._get_text(chapter.get('timeDescription')))
1953 chapter_title = lambda chapter: self._get_text(chapter.get('title'))
1954
1955 return next((
1956 filter(None, (
1957 self._extract_chapters(
1958 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
1959 chapter_time, chapter_title, duration)
1960 for contents in content_list
1961 ))), [])
1962
1963 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
1964 chapters = []
1965 last_chapter = {'start_time': 0}
1966 for idx, chapter in enumerate(chapter_list or []):
1967 title = chapter_title(chapter)
1968 start_time = chapter_time(chapter)
1969 if start_time is None:
1970 continue
1971 last_chapter['end_time'] = start_time
1972 if start_time < last_chapter['start_time']:
1973 if idx == 1:
1974 chapters.pop()
1975 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
1976 else:
1977 self.report_warning(f'Invalid start time for chapter "{title}"')
1978 continue
1979 last_chapter = {'start_time': start_time, 'title': title}
1980 chapters.append(last_chapter)
1981 last_chapter['end_time'] = duration
1982 return chapters
1983
1984 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1985 return self._parse_json(self._search_regex(
1986 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1987 regex), webpage, name, default='{}'), video_id, fatal=False)
1988
1989 @staticmethod
1990 def parse_time_text(time_text):
1991 """
1992 Parse the comment time text
1993 time_text is in the format 'X units ago (edited)'
1994 """
1995 time_text_split = time_text.split(' ')
1996 if len(time_text_split) >= 3:
1997 try:
1998 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1999 except ValueError:
2000 return None
2001
2002 def _extract_comment(self, comment_renderer, parent=None):
2003 comment_id = comment_renderer.get('commentId')
2004 if not comment_id:
2005 return
2006
2007 text = self._get_text(comment_renderer.get('contentText'))
2008
2009 # note: timestamp is an estimate calculated from the current time and time_text
2010 time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
2011 time_text_dt = self.parse_time_text(time_text)
2012 if isinstance(time_text_dt, datetime.datetime):
2013 timestamp = calendar.timegm(time_text_dt.timetuple())
2014 author = self._get_text(comment_renderer.get('authorText'))
2015 author_id = try_get(comment_renderer,
2016 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2017
2018 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2019 lambda x: x['likeCount']), compat_str)) or 0
2020 author_thumbnail = try_get(comment_renderer,
2021 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2022
2023 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2024 is_favorited = 'creatorHeart' in (try_get(
2025 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2026 return {
2027 'id': comment_id,
2028 'text': text,
2029 'timestamp': timestamp,
2030 'time_text': time_text,
2031 'like_count': votes,
2032 'is_favorited': is_favorited,
2033 'author': author,
2034 'author_id': author_id,
2035 'author_thumbnail': author_thumbnail,
2036 'author_is_uploader': author_is_uploader,
2037 'parent': parent or 'root'
2038 }
2039
2040 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2041 ytcfg, video_id, parent=None, comment_counts=None):
2042
2043 def extract_header(contents):
2044 _total_comments = 0
2045 _continuation = None
2046 for content in contents:
2047 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2048 expected_comment_count = parse_count(self._get_text(
2049 comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2050
2051 if expected_comment_count:
2052 comment_counts[1] = expected_comment_count
2053 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2054 _total_comments = comment_counts[1]
2055 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2056 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2057
2058 sort_menu_item = try_get(
2059 comments_header_renderer,
2060 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2061 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2062
2063 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2064 if not _continuation:
2065 continue
2066
2067 sort_text = sort_menu_item.get('title')
2068 if isinstance(sort_text, compat_str):
2069 sort_text = sort_text.lower()
2070 else:
2071 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2072 self.to_screen('Sorting comments by %s' % sort_text)
2073 break
2074 return _total_comments, _continuation
2075
2076 def extract_thread(contents):
2077 if not parent:
2078 comment_counts[2] = 0
2079 for content in contents:
2080 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2081 comment_renderer = try_get(
2082 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2083 content, (lambda x: x['commentRenderer'], dict))
2084
2085 if not comment_renderer:
2086 continue
2087 comment = self._extract_comment(comment_renderer, parent)
2088 if not comment:
2089 continue
2090 comment_counts[0] += 1
2091 yield comment
2092 # Attempt to get the replies
2093 comment_replies_renderer = try_get(
2094 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2095
2096 if comment_replies_renderer:
2097 comment_counts[2] += 1
2098 comment_entries_iter = self._comment_entries(
2099 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2100 video_id, parent=comment.get('id'), comment_counts=comment_counts)
2101
2102 for reply_comment in comment_entries_iter:
2103 yield reply_comment
2104
2105 # YouTube comments have a max depth of 2
2106 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2107 if max_depth == 1 and parent:
2108 return
2109 if not comment_counts:
2110 # comment so far, est. total comments, current comment thread #
2111 comment_counts = [0, 0, 0]
2112
2113 continuation = self._extract_continuation(root_continuation_data)
2114 if continuation and len(continuation['continuation']) < 27:
2115 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2116 continuation_token = self._generate_comment_continuation(video_id)
2117 continuation = self._build_api_continuation_query(continuation_token, None)
2118
2119 visitor_data = None
2120 is_first_continuation = parent is None
2121
2122 for page_num in itertools.count(0):
2123 if not continuation:
2124 break
2125 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2126 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2127 if page_num == 0:
2128 if is_first_continuation:
2129 note_prefix = 'Downloading comment section API JSON'
2130 else:
2131 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2132 comment_counts[2], comment_prog_str)
2133 else:
2134 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2135 ' ' if parent else '', ' replies' if parent else '',
2136 page_num, comment_prog_str)
2137
2138 response = self._extract_response(
2139 item_id=None, query=continuation,
2140 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2141 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2142 if not response:
2143 break
2144 visitor_data = try_get(
2145 response,
2146 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2147 compat_str) or visitor_data
2148
2149 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2150
2151 continuation = None
2152 if isinstance(continuation_contents, list):
2153 for continuation_section in continuation_contents:
2154 if not isinstance(continuation_section, dict):
2155 continue
2156 continuation_items = try_get(
2157 continuation_section,
2158 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2159 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2160 list) or []
2161 if is_first_continuation:
2162 total_comments, continuation = extract_header(continuation_items)
2163 if total_comments:
2164 yield total_comments
2165 is_first_continuation = False
2166 if continuation:
2167 break
2168 continue
2169 count = 0
2170 for count, entry in enumerate(extract_thread(continuation_items)):
2171 yield entry
2172 continuation = self._extract_continuation({'contents': continuation_items})
2173 if continuation:
2174 # Sometimes YouTube provides a continuation without any comments
2175 # In most cases we end up just downloading these with very little comments to come.
2176 if count == 0:
2177 if not parent:
2178 self.report_warning('No comments received - assuming end of comments')
2179 continuation = None
2180 break
2181
2182 # Deprecated response structure
2183 elif isinstance(continuation_contents, dict):
2184 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2185 for key, continuation_renderer in continuation_contents.items():
2186 if key not in known_continuation_renderers:
2187 continue
2188 if not isinstance(continuation_renderer, dict):
2189 continue
2190 if is_first_continuation:
2191 header_continuation_items = [continuation_renderer.get('header') or {}]
2192 total_comments, continuation = extract_header(header_continuation_items)
2193 if total_comments:
2194 yield total_comments
2195 is_first_continuation = False
2196 if continuation:
2197 break
2198
2199 # Sometimes YouTube provides a continuation without any comments
2200 # In most cases we end up just downloading these with very little comments to come.
2201 count = 0
2202 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2203 yield entry
2204 continuation = self._extract_continuation(continuation_renderer)
2205 if count == 0:
2206 if not parent:
2207 self.report_warning('No comments received - assuming end of comments')
2208 continuation = None
2209 break
2210
2211 @staticmethod
2212 def _generate_comment_continuation(video_id):
2213 """
2214 Generates initial comment section continuation token from given video id
2215 """
2216 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2217 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2218 new_continuation_intlist = list(itertools.chain.from_iterable(
2219 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2220 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2221
2222 def _extract_comments(self, ytcfg, video_id, contents, webpage):
2223 """Entry for comment extraction"""
2224 def _real_comment_extract(contents):
2225 if isinstance(contents, list):
2226 for entry in contents:
2227 for key, renderer in entry.items():
2228 if key not in known_entry_comment_renderers:
2229 continue
2230 yield from self._comment_entries(
2231 renderer, video_id=video_id, ytcfg=ytcfg,
2232 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2233 account_syncid=self._extract_account_syncid(ytcfg))
2234 break
2235 comments = []
2236 known_entry_comment_renderers = ('itemSectionRenderer',)
2237 estimated_total = 0
2238 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2239
2240 try:
2241 for comment in _real_comment_extract(contents):
2242 if len(comments) >= max_comments:
2243 break
2244 if isinstance(comment, int):
2245 estimated_total = comment
2246 continue
2247 comments.append(comment)
2248 except KeyboardInterrupt:
2249 self.to_screen('Interrupted by user')
2250 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2251 return {
2252 'comments': comments,
2253 'comment_count': len(comments),
2254 }
2255
2256 @staticmethod
2257 def _generate_player_context(sts=None):
2258 context = {
2259 'html5Preference': 'HTML5_PREF_WANTS',
2260 }
2261 if sts is not None:
2262 context['signatureTimestamp'] = sts
2263 return {
2264 'playbackContext': {
2265 'contentPlaybackContext': context
2266 },
2267 'contentCheckOk': True
2268 }
2269
2270 @staticmethod
2271 def _get_video_info_params(video_id, client='TVHTML5'):
2272 GVI_CLIENTS = {
2273 'ANDROID': {
2274 'c': 'ANDROID',
2275 'cver': '16.20',
2276 },
2277 'TVHTML5': {
2278 'c': 'TVHTML5',
2279 'cver': '6.20180913',
2280 }
2281 }
2282 query = {
2283 'video_id': video_id,
2284 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2285 'html5': '1'
2286 }
2287 query.update(GVI_CLIENTS.get(client))
2288 return query
2289
2290 def _real_extract(self, url):
2291 url, smuggled_data = unsmuggle_url(url, {})
2292 video_id = self._match_id(url)
2293
2294 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2295
2296 base_url = self.http_scheme() + '//www.youtube.com/'
2297 webpage_url = base_url + 'watch?v=' + video_id
2298 webpage = self._download_webpage(
2299 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2300
2301 ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2302 identity_token = self._extract_identity_token(webpage, video_id)
2303 session_index = self._extract_session_index(ytcfg)
2304 player_url = self._extract_player_url(ytcfg, webpage)
2305
2306 player_client = self._configuration_arg('player_client', [''])[0]
2307 if player_client not in ('web', 'android', ''):
2308 self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')
2309 force_mobile_client = player_client != 'web'
2310 player_skip = self._configuration_arg('player_skip')
2311 player_response = None
2312 if webpage:
2313 player_response = self._extract_yt_initial_variable(
2314 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2315 video_id, 'initial player response')
2316
2317 syncid = self._extract_account_syncid(ytcfg, player_response)
2318 headers = self._generate_api_headers(ytcfg, identity_token, syncid, session_index=session_index)
2319
2320 ytm_streaming_data = {}
2321 if is_music_url:
2322 ytm_webpage = None
2323 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2324 if sts and not force_mobile_client and 'configs' not in player_skip:
2325 ytm_webpage = self._download_webpage(
2326 'https://music.youtube.com',
2327 video_id, fatal=False, note='Downloading remix client config')
2328
2329 ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2330 ytm_client = 'WEB_REMIX'
2331 if not sts or force_mobile_client:
2332 # Android client already has signature descrambled
2333 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2334 if not sts:
2335 self.report_warning('Falling back to android remix client for player API.')
2336 ytm_client = 'ANDROID_MUSIC'
2337 ytm_cfg = {}
2338
2339 ytm_headers = self._generate_api_headers(
2340 ytm_cfg, identity_token, syncid,
2341 client=ytm_client, session_index=session_index)
2342 ytm_query = {'videoId': video_id}
2343 ytm_query.update(self._generate_player_context(sts))
2344
2345 ytm_player_response = self._extract_response(
2346 item_id=video_id, ep='player', query=ytm_query,
2347 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2348 default_client=ytm_client,
2349 note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))
2350 ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
2351
2352 if not player_response or force_mobile_client:
2353 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2354 yt_client = 'WEB'
2355 ytpcfg = ytcfg
2356 ytp_headers = headers
2357 if not sts or force_mobile_client:
2358 # Android client already has signature descrambled
2359 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2360 if not sts:
2361 self.report_warning('Falling back to android client for player API.')
2362 yt_client = 'ANDROID'
2363 ytpcfg = {}
2364 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid,
2365 client=yt_client, session_index=session_index)
2366
2367 yt_query = {'videoId': video_id}
2368 yt_query.update(self._generate_player_context(sts))
2369 player_response = self._extract_response(
2370 item_id=video_id, ep='player', query=yt_query,
2371 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2372 default_client=yt_client,
2373 note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')
2374 ) or player_response
2375
2376 # Age-gate workarounds
2377 playability_status = player_response.get('playabilityStatus') or {}
2378 if playability_status.get('reason') in self._AGE_GATE_REASONS:
2379 gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')
2380 for gvi_client in gvi_clients:
2381 pr = self._parse_json(try_get(compat_parse_qs(
2382 self._download_webpage(
2383 base_url + 'get_video_info', video_id,
2384 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2385 'unable to download video info webpage', fatal=False,
2386 query=self._get_video_info_params(video_id, client=gvi_client))),
2387 lambda x: x['player_response'][0],
2388 compat_str) or '{}', video_id)
2389 if pr:
2390 break
2391 if not pr:
2392 self.report_warning('Falling back to embedded-only age-gate workaround.')
2393 embed_webpage = None
2394 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2395 if sts and not force_mobile_client and 'configs' not in player_skip:
2396 embed_webpage = self._download_webpage(
2397 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2398 video_id=video_id, note='Downloading age-gated embed config')
2399
2400 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2401 # If we extracted the embed webpage, it'll tell us if we can view the video
2402 embedded_pr = self._parse_json(
2403 try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2404 video_id=video_id)
2405 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2406 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2407 yt_client = 'WEB_EMBEDDED_PLAYER'
2408 if not sts or force_mobile_client:
2409 # Android client already has signature descrambled
2410 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2411 if not sts:
2412 self.report_warning(
2413 'Falling back to android embedded client for player API (note: some formats may be missing).')
2414 yt_client = 'ANDROID_EMBEDDED_PLAYER'
2415 ytcfg_age = {}
2416
2417 ytage_headers = self._generate_api_headers(
2418 ytcfg_age, identity_token, syncid,
2419 client=yt_client, session_index=session_index)
2420 yt_age_query = {'videoId': video_id}
2421 yt_age_query.update(self._generate_player_context(sts))
2422 pr = self._extract_response(
2423 item_id=video_id, ep='player', query=yt_age_query,
2424 ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2425 default_client=yt_client,
2426 note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')
2427 ) or {}
2428
2429 if pr:
2430 player_response = pr
2431
2432 trailer_video_id = try_get(
2433 playability_status,
2434 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2435 compat_str)
2436 if trailer_video_id:
2437 return self.url_result(
2438 trailer_video_id, self.ie_key(), trailer_video_id)
2439
2440 search_meta = (
2441 lambda x: self._html_search_meta(x, webpage, default=None)) \
2442 if webpage else lambda x: None
2443
2444 video_details = player_response.get('videoDetails') or {}
2445 microformat = try_get(
2446 player_response,
2447 lambda x: x['microformat']['playerMicroformatRenderer'],
2448 dict) or {}
2449 video_title = video_details.get('title') \
2450 or self._get_text(microformat.get('title')) \
2451 or search_meta(['og:title', 'twitter:title', 'title'])
2452 video_description = video_details.get('shortDescription')
2453
2454 if not smuggled_data.get('force_singlefeed', False):
2455 if not self.get_param('noplaylist'):
2456 multifeed_metadata_list = try_get(
2457 player_response,
2458 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
2459 compat_str)
2460 if multifeed_metadata_list:
2461 entries = []
2462 feed_ids = []
2463 for feed in multifeed_metadata_list.split(','):
2464 # Unquote should take place before split on comma (,) since textual
2465 # fields may contain comma as well (see
2466 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2467 feed_data = compat_parse_qs(
2468 compat_urllib_parse_unquote_plus(feed))
2469
2470 def feed_entry(name):
2471 return try_get(
2472 feed_data, lambda x: x[name][0], compat_str)
2473
2474 feed_id = feed_entry('id')
2475 if not feed_id:
2476 continue
2477 feed_title = feed_entry('title')
2478 title = video_title
2479 if feed_title:
2480 title += ' (%s)' % feed_title
2481 entries.append({
2482 '_type': 'url_transparent',
2483 'ie_key': 'Youtube',
2484 'url': smuggle_url(
2485 base_url + 'watch?v=' + feed_data['id'][0],
2486 {'force_singlefeed': True}),
2487 'title': title,
2488 })
2489 feed_ids.append(feed_id)
2490 self.to_screen(
2491 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2492 % (', '.join(feed_ids), video_id))
2493 return self.playlist_result(
2494 entries, video_id, video_title, video_description)
2495 else:
2496 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2497
2498 formats, itags, stream_ids = [], [], []
2499 itag_qualities = {}
2500 q = qualities([
2501 # "tiny" is the smallest video-only format. But some audio-only formats
2502 # was also labeled "tiny". It is not clear if such formats still exist
2503 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2504 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2505 ])
2506
2507 streaming_data = player_response.get('streamingData') or {}
2508 streaming_formats = streaming_data.get('formats') or []
2509 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
2510 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2511 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2512
2513 for fmt in streaming_formats:
2514 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2515 continue
2516
2517 itag = str_or_none(fmt.get('itag'))
2518 audio_track = fmt.get('audioTrack') or {}
2519 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2520 if stream_id in stream_ids:
2521 continue
2522
2523 quality = fmt.get('quality')
2524 if quality == 'tiny' or not quality:
2525 quality = fmt.get('audioQuality', '').lower() or quality
2526 if itag and quality:
2527 itag_qualities[itag] = quality
2528 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2529 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2530 # number of fragment that would subsequently requested with (`&sq=N`)
2531 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2532 continue
2533
2534 fmt_url = fmt.get('url')
2535 if not fmt_url:
2536 sc = compat_parse_qs(fmt.get('signatureCipher'))
2537 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2538 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2539 if not (sc and fmt_url and encrypted_sig):
2540 continue
2541 if not player_url:
2542 continue
2543 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2544 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2545 fmt_url += '&' + sp + '=' + signature
2546
2547 if itag:
2548 itags.append(itag)
2549 stream_ids.append(stream_id)
2550
2551 tbr = float_or_none(
2552 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2553 dct = {
2554 'asr': int_or_none(fmt.get('audioSampleRate')),
2555 'filesize': int_or_none(fmt.get('contentLength')),
2556 'format_id': itag,
2557 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
2558 'fps': int_or_none(fmt.get('fps')),
2559 'height': int_or_none(fmt.get('height')),
2560 'quality': q(quality),
2561 'tbr': tbr,
2562 'url': fmt_url,
2563 'width': fmt.get('width'),
2564 'language': audio_track.get('id', '').split('.')[0],
2565 }
2566 mime_mobj = re.match(
2567 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2568 if mime_mobj:
2569 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2570 dct.update(parse_codecs(mime_mobj.group(2)))
2571 # The 3gp format in android client has a quality of "small",
2572 # but is actually worse than all other formats
2573 if dct['ext'] == '3gp':
2574 dct['quality'] = q('tiny')
2575 no_audio = dct.get('acodec') == 'none'
2576 no_video = dct.get('vcodec') == 'none'
2577 if no_audio:
2578 dct['vbr'] = tbr
2579 if no_video:
2580 dct['abr'] = tbr
2581 if no_audio or no_video:
2582 dct['downloader_options'] = {
2583 # Youtube throttles chunks >~10M
2584 'http_chunk_size': 10485760,
2585 }
2586 if dct.get('ext'):
2587 dct['container'] = dct['ext'] + '_dash'
2588 formats.append(dct)
2589
2590 skip_manifests = self._configuration_arg('skip')
2591 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2592 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2593
2594 for sd in (streaming_data, ytm_streaming_data):
2595 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2596 if hls_manifest_url:
2597 for f in self._extract_m3u8_formats(
2598 hls_manifest_url, video_id, 'mp4', fatal=False):
2599 itag = self._search_regex(
2600 r'/itag/(\d+)', f['url'], 'itag', default=None)
2601 if itag:
2602 f['format_id'] = itag
2603 formats.append(f)
2604
2605 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2606 if dash_manifest_url:
2607 for f in self._extract_mpd_formats(
2608 dash_manifest_url, video_id, fatal=False):
2609 itag = f['format_id']
2610 if itag in itags:
2611 continue
2612 if itag in itag_qualities:
2613 f['quality'] = q(itag_qualities[itag])
2614 filesize = int_or_none(self._search_regex(
2615 r'/clen/(\d+)', f.get('fragment_base_url')
2616 or f['url'], 'file size', default=None))
2617 if filesize:
2618 f['filesize'] = filesize
2619 formats.append(f)
2620
2621 if not formats:
2622 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
2623 self.raise_no_formats(
2624 'This video is DRM protected.', expected=True)
2625 pemr = try_get(
2626 playability_status,
2627 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2628 dict) or {}
2629 reason = self._get_text(pemr.get('reason')) or playability_status.get('reason')
2630 subreason = pemr.get('subreason')
2631 if subreason:
2632 subreason = clean_html(self._get_text(subreason))
2633 if subreason == 'The uploader has not made this video available in your country.':
2634 countries = microformat.get('availableCountries')
2635 if not countries:
2636 regions_allowed = search_meta('regionsAllowed')
2637 countries = regions_allowed.split(',') if regions_allowed else None
2638 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2639 reason += '\n' + subreason
2640 if reason:
2641 self.raise_no_formats(reason, expected=True)
2642
2643 self._sort_formats(formats)
2644
2645 keywords = video_details.get('keywords') or []
2646 if not keywords and webpage:
2647 keywords = [
2648 unescapeHTML(m.group('content'))
2649 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2650 for keyword in keywords:
2651 if keyword.startswith('yt:stretch='):
2652 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2653 if mobj:
2654 # NB: float is intentional for forcing float division
2655 w, h = (float(v) for v in mobj.groups())
2656 if w > 0 and h > 0:
2657 ratio = w / h
2658 for f in formats:
2659 if f.get('vcodec') != 'none':
2660 f['stretched_ratio'] = ratio
2661 break
2662
2663 category = microformat.get('category') or search_meta('genre')
2664 channel_id = video_details.get('channelId') \
2665 or microformat.get('externalChannelId') \
2666 or search_meta('channelId')
2667 duration = int_or_none(
2668 video_details.get('lengthSeconds')
2669 or microformat.get('lengthSeconds')) \
2670 or parse_duration(search_meta('duration'))
2671 is_live = video_details.get('isLive')
2672 is_upcoming = video_details.get('isUpcoming')
2673 owner_profile_url = microformat.get('ownerProfileUrl')
2674
2675 thumbnails = []
2676 for container in (video_details, microformat):
2677 for thumbnail in (try_get(
2678 container,
2679 lambda x: x['thumbnail']['thumbnails'], list) or []):
2680 thumbnail_url = thumbnail.get('url')
2681 if not thumbnail_url:
2682 continue
2683 # Sometimes youtube gives a wrong thumbnail URL. See:
2684 # https://github.com/yt-dlp/yt-dlp/issues/233
2685 # https://github.com/ytdl-org/youtube-dl/issues/28023
2686 if 'maxresdefault' in thumbnail_url:
2687 thumbnail_url = thumbnail_url.split('?')[0]
2688 thumbnails.append({
2689 'url': thumbnail_url,
2690 'height': int_or_none(thumbnail.get('height')),
2691 'width': int_or_none(thumbnail.get('width')),
2692 })
2693 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2694 if thumbnail_url:
2695 thumbnails.append({
2696 'url': thumbnail_url,
2697 })
2698 # The best resolution thumbnails sometimes does not appear in the webpage
2699 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2700 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2701 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2702 guaranteed_thumbnail_names = [
2703 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2704 'mqdefault', 'mq1', 'mq2', 'mq3',
2705 'default', '1', '2', '3'
2706 ]
2707 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2708 n_thumbnail_names = len(thumbnail_names)
2709
2710 thumbnails.extend({
2711 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2712 video_id=video_id, name=name, ext=ext,
2713 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2714 '_test_url': name in hq_thumbnail_names,
2715 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2716 for thumb in thumbnails:
2717 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2718 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2719 self._remove_duplicate_formats(thumbnails)
2720
2721 info = {
2722 'id': video_id,
2723 'title': self._live_title(video_title) if is_live else video_title,
2724 'formats': formats,
2725 'thumbnails': thumbnails,
2726 'description': video_description,
2727 'upload_date': unified_strdate(
2728 microformat.get('uploadDate')
2729 or search_meta('uploadDate')),
2730 'uploader': video_details['author'],
2731 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2732 'uploader_url': owner_profile_url,
2733 'channel_id': channel_id,
2734 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2735 'duration': duration,
2736 'view_count': int_or_none(
2737 video_details.get('viewCount')
2738 or microformat.get('viewCount')
2739 or search_meta('interactionCount')),
2740 'average_rating': float_or_none(video_details.get('averageRating')),
2741 'age_limit': 18 if (
2742 microformat.get('isFamilySafe') is False
2743 or search_meta('isFamilyFriendly') == 'false'
2744 or search_meta('og:restrictions:age') == '18+') else 0,
2745 'webpage_url': webpage_url,
2746 'categories': [category] if category else None,
2747 'tags': keywords,
2748 'is_live': is_live,
2749 'playable_in_embed': playability_status.get('playableInEmbed'),
2750 'was_live': video_details.get('isLiveContent'),
2751 }
2752
2753 pctr = try_get(
2754 player_response,
2755 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2756 subtitles = {}
2757 if pctr:
2758 def process_language(container, base_url, lang_code, sub_name, query):
2759 lang_subs = container.setdefault(lang_code, [])
2760 for fmt in self._SUBTITLE_FORMATS:
2761 query.update({
2762 'fmt': fmt,
2763 })
2764 lang_subs.append({
2765 'ext': fmt,
2766 'url': update_url_query(base_url, query),
2767 'name': sub_name,
2768 })
2769
2770 for caption_track in (pctr.get('captionTracks') or []):
2771 base_url = caption_track.get('baseUrl')
2772 if not base_url:
2773 continue
2774 if caption_track.get('kind') != 'asr':
2775 lang_code = (
2776 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2777 or caption_track.get('languageCode'))
2778 if not lang_code:
2779 continue
2780 process_language(
2781 subtitles, base_url, lang_code,
2782 try_get(caption_track, lambda x: x['name']['simpleText']),
2783 {})
2784 continue
2785 automatic_captions = {}
2786 for translation_language in (pctr.get('translationLanguages') or []):
2787 translation_language_code = translation_language.get('languageCode')
2788 if not translation_language_code:
2789 continue
2790 process_language(
2791 automatic_captions, base_url, translation_language_code,
2792 self._get_text(translation_language.get('languageName'), max_runs=1),
2793 {'tlang': translation_language_code})
2794 info['automatic_captions'] = automatic_captions
2795 info['subtitles'] = subtitles
2796
2797 parsed_url = compat_urllib_parse_urlparse(url)
2798 for component in [parsed_url.fragment, parsed_url.query]:
2799 query = compat_parse_qs(component)
2800 for k, v in query.items():
2801 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2802 d_k += '_time'
2803 if d_k not in info and k in s_ks:
2804 info[d_k] = parse_duration(query[k][0])
2805
2806 # Youtube Music Auto-generated description
2807 if video_description:
2808 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2809 if mobj:
2810 release_year = mobj.group('release_year')
2811 release_date = mobj.group('release_date')
2812 if release_date:
2813 release_date = release_date.replace('-', '')
2814 if not release_year:
2815 release_year = release_date[:4]
2816 info.update({
2817 'album': mobj.group('album'.strip()),
2818 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2819 'track': mobj.group('track').strip(),
2820 'release_date': release_date,
2821 'release_year': int_or_none(release_year),
2822 })
2823
2824 initial_data = None
2825 if webpage:
2826 initial_data = self._extract_yt_initial_variable(
2827 webpage, self._YT_INITIAL_DATA_RE, video_id,
2828 'yt initial data')
2829 if not initial_data:
2830 initial_data = self._extract_response(
2831 item_id=video_id, ep='next', fatal=False,
2832 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2833 note='Downloading initial data API JSON')
2834
2835 try:
2836 # This will error if there is no livechat
2837 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2838 info['subtitles']['live_chat'] = [{
2839 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2840 'video_id': video_id,
2841 'ext': 'json',
2842 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2843 }]
2844 except (KeyError, IndexError, TypeError):
2845 pass
2846
2847 if initial_data:
2848 info['chapters'] = (
2849 self._extract_chapters_from_json(initial_data, duration)
2850 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2851 or None)
2852
2853 contents = try_get(
2854 initial_data,
2855 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2856 list) or []
2857 for content in contents:
2858 vpir = content.get('videoPrimaryInfoRenderer')
2859 if vpir:
2860 stl = vpir.get('superTitleLink')
2861 if stl:
2862 stl = self._get_text(stl)
2863 if try_get(
2864 vpir,
2865 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2866 info['location'] = stl
2867 else:
2868 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2869 if mobj:
2870 info.update({
2871 'series': mobj.group(1),
2872 'season_number': int(mobj.group(2)),
2873 'episode_number': int(mobj.group(3)),
2874 })
2875 for tlb in (try_get(
2876 vpir,
2877 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2878 list) or []):
2879 tbr = tlb.get('toggleButtonRenderer') or {}
2880 for getter, regex in [(
2881 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2882 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2883 lambda x: x['accessibility'],
2884 lambda x: x['accessibilityData']['accessibilityData'],
2885 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2886 label = (try_get(tbr, getter, dict) or {}).get('label')
2887 if label:
2888 mobj = re.match(regex, label)
2889 if mobj:
2890 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2891 break
2892 sbr_tooltip = try_get(
2893 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2894 if sbr_tooltip:
2895 like_count, dislike_count = sbr_tooltip.split(' / ')
2896 info.update({
2897 'like_count': str_to_int(like_count),
2898 'dislike_count': str_to_int(dislike_count),
2899 })
2900 vsir = content.get('videoSecondaryInfoRenderer')
2901 if vsir:
2902 info['channel'] = self._get_text(try_get(
2903 vsir,
2904 lambda x: x['owner']['videoOwnerRenderer']['title'],
2905 dict))
2906 rows = try_get(
2907 vsir,
2908 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2909 list) or []
2910 multiple_songs = False
2911 for row in rows:
2912 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2913 multiple_songs = True
2914 break
2915 for row in rows:
2916 mrr = row.get('metadataRowRenderer') or {}
2917 mrr_title = mrr.get('title')
2918 if not mrr_title:
2919 continue
2920 mrr_title = self._get_text(mrr['title'])
2921 mrr_contents_text = self._get_text(mrr['contents'][0])
2922 if mrr_title == 'License':
2923 info['license'] = mrr_contents_text
2924 elif not multiple_songs:
2925 if mrr_title == 'Album':
2926 info['album'] = mrr_contents_text
2927 elif mrr_title == 'Artist':
2928 info['artist'] = mrr_contents_text
2929 elif mrr_title == 'Song':
2930 info['track'] = mrr_contents_text
2931
2932 fallbacks = {
2933 'channel': 'uploader',
2934 'channel_id': 'uploader_id',
2935 'channel_url': 'uploader_url',
2936 }
2937 for to, frm in fallbacks.items():
2938 if not info.get(to):
2939 info[to] = info.get(frm)
2940
2941 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2942 v = info.get(s_k)
2943 if v:
2944 info[d_k] = v
2945
2946 is_private = bool_or_none(video_details.get('isPrivate'))
2947 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2948 is_membersonly = None
2949 is_premium = None
2950 if initial_data and is_private is not None:
2951 is_membersonly = False
2952 is_premium = False
2953 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2954 badge_labels = set()
2955 for content in contents:
2956 if not isinstance(content, dict):
2957 continue
2958 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
2959 for badge_label in badge_labels:
2960 if badge_label.lower() == 'members only':
2961 is_membersonly = True
2962 elif badge_label.lower() == 'premium':
2963 is_premium = True
2964 elif badge_label.lower() == 'unlisted':
2965 is_unlisted = True
2966
2967 info['availability'] = self._availability(
2968 is_private=is_private,
2969 needs_premium=is_premium,
2970 needs_subscription=is_membersonly,
2971 needs_auth=info['age_limit'] >= 18,
2972 is_unlisted=None if is_private is None else is_unlisted)
2973
2974 # get xsrf for annotations or comments
2975 get_annotations = self.get_param('writeannotations', False)
2976 get_comments = self.get_param('getcomments', False)
2977 if get_annotations or get_comments:
2978 xsrf_token = None
2979 ytcfg = self._extract_ytcfg(video_id, webpage)
2980 if ytcfg:
2981 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2982 if not xsrf_token:
2983 xsrf_token = self._search_regex(
2984 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2985 webpage, 'xsrf token', group='xsrf_token', fatal=False)
2986
2987 # annotations
2988 if get_annotations:
2989 invideo_url = try_get(
2990 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2991 if xsrf_token and invideo_url:
2992 xsrf_field_name = None
2993 if ytcfg:
2994 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2995 if not xsrf_field_name:
2996 xsrf_field_name = self._search_regex(
2997 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2998 webpage, 'xsrf field name',
2999 group='xsrf_field_name', default='session_token')
3000 info['annotations'] = self._download_webpage(
3001 self._proto_relative_url(invideo_url),
3002 video_id, note='Downloading annotations',
3003 errnote='Unable to download video annotations', fatal=False,
3004 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3005
3006 if get_comments:
3007 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
3008
3009 self.mark_watched(video_id, player_response)
3010
3011 return info
3012
3013
3014 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3015 IE_DESC = 'YouTube.com tab'
3016 _VALID_URL = r'''(?x)
3017 https?://
3018 (?:\w+\.)?
3019 (?:
3020 youtube(?:kids)?\.com|
3021 invidio\.us
3022 )/
3023 (?:
3024 (?P<channel_type>channel|c|user|browse)/|
3025 (?P<not_channel>
3026 feed/|hashtag/|
3027 (?:playlist|watch)\?.*?\blist=
3028 )|
3029 (?!(?:%s)\b) # Direct URLs
3030 )
3031 (?P<id>[^/?\#&]+)
3032 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3033 IE_NAME = 'youtube:tab'
3034
3035 _TESTS = [{
3036 'note': 'playlists, multipage',
3037 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3038 'playlist_mincount': 94,
3039 'info_dict': {
3040 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3041 'title': 'Игорь Клейнер - Playlists',
3042 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3043 'uploader': 'Игорь Клейнер',
3044 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3045 },
3046 }, {
3047 'note': 'playlists, multipage, different order',
3048 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3049 'playlist_mincount': 94,
3050 'info_dict': {
3051 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3052 'title': 'Игорь Клейнер - Playlists',
3053 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3054 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3055 'uploader': 'Игорь Клейнер',
3056 },
3057 }, {
3058 'note': 'playlists, series',
3059 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3060 'playlist_mincount': 5,
3061 'info_dict': {
3062 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3063 'title': '3Blue1Brown - Playlists',
3064 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3065 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3066 'uploader': '3Blue1Brown',
3067 },
3068 }, {
3069 'note': 'playlists, singlepage',
3070 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3071 'playlist_mincount': 4,
3072 'info_dict': {
3073 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3074 'title': 'ThirstForScience - Playlists',
3075 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3076 'uploader': 'ThirstForScience',
3077 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3078 }
3079 }, {
3080 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3081 'only_matching': True,
3082 }, {
3083 'note': 'basic, single video playlist',
3084 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3085 'info_dict': {
3086 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3087 'uploader': 'Sergey M.',
3088 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3089 'title': 'youtube-dl public playlist',
3090 },
3091 'playlist_count': 1,
3092 }, {
3093 'note': 'empty playlist',
3094 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3095 'info_dict': {
3096 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3097 'uploader': 'Sergey M.',
3098 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3099 'title': 'youtube-dl empty playlist',
3100 },
3101 'playlist_count': 0,
3102 }, {
3103 'note': 'Home tab',
3104 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3105 'info_dict': {
3106 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3107 'title': 'lex will - Home',
3108 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3109 'uploader': 'lex will',
3110 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3111 },
3112 'playlist_mincount': 2,
3113 }, {
3114 'note': 'Videos tab',
3115 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3116 'info_dict': {
3117 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3118 'title': 'lex will - Videos',
3119 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3120 'uploader': 'lex will',
3121 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3122 },
3123 'playlist_mincount': 975,
3124 }, {
3125 'note': 'Videos tab, sorted by popular',
3126 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3127 'info_dict': {
3128 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3129 'title': 'lex will - Videos',
3130 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3131 'uploader': 'lex will',
3132 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3133 },
3134 'playlist_mincount': 199,
3135 }, {
3136 'note': 'Playlists tab',
3137 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3138 'info_dict': {
3139 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3140 'title': 'lex will - Playlists',
3141 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3142 'uploader': 'lex will',
3143 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3144 },
3145 'playlist_mincount': 17,
3146 }, {
3147 'note': 'Community tab',
3148 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3149 'info_dict': {
3150 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3151 'title': 'lex will - Community',
3152 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3153 'uploader': 'lex will',
3154 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3155 },
3156 'playlist_mincount': 18,
3157 }, {
3158 'note': 'Channels tab',
3159 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3160 'info_dict': {
3161 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3162 'title': 'lex will - Channels',
3163 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3164 'uploader': 'lex will',
3165 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3166 },
3167 'playlist_mincount': 12,
3168 }, {
3169 'note': 'Search tab',
3170 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3171 'playlist_mincount': 40,
3172 'info_dict': {
3173 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3174 'title': '3Blue1Brown - Search - linear algebra',
3175 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3176 'uploader': '3Blue1Brown',
3177 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3178 },
3179 }, {
3180 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3181 'only_matching': True,
3182 }, {
3183 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3184 'only_matching': True,
3185 }, {
3186 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3187 'only_matching': True,
3188 }, {
3189 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3190 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3191 'info_dict': {
3192 'title': '29C3: Not my department',
3193 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3194 'uploader': 'Christiaan008',
3195 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3196 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3197 },
3198 'playlist_count': 96,
3199 }, {
3200 'note': 'Large playlist',
3201 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3202 'info_dict': {
3203 'title': 'Uploads from Cauchemar',
3204 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3205 'uploader': 'Cauchemar',
3206 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3207 },
3208 'playlist_mincount': 1123,
3209 }, {
3210 'note': 'even larger playlist, 8832 videos',
3211 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3212 'only_matching': True,
3213 }, {
3214 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3215 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3216 'info_dict': {
3217 'title': 'Uploads from Interstellar Movie',
3218 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3219 'uploader': 'Interstellar Movie',
3220 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3221 },
3222 'playlist_mincount': 21,
3223 }, {
3224 'note': 'Playlist with "show unavailable videos" button',
3225 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3226 'info_dict': {
3227 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3228 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3229 'uploader': 'Phim Siêu Nhân Nhật Bản',
3230 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3231 },
3232 'playlist_mincount': 200,
3233 }, {
3234 'note': 'Playlist with unavailable videos in page 7',
3235 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3236 'info_dict': {
3237 'title': 'Uploads from BlankTV',
3238 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3239 'uploader': 'BlankTV',
3240 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3241 },
3242 'playlist_mincount': 1000,
3243 }, {
3244 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3245 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3246 'info_dict': {
3247 'title': 'Data Analysis with Dr Mike Pound',
3248 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3249 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3250 'uploader': 'Computerphile',
3251 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3252 },
3253 'playlist_mincount': 11,
3254 }, {
3255 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3256 'only_matching': True,
3257 }, {
3258 'note': 'Playlist URL that does not actually serve a playlist',
3259 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3260 'info_dict': {
3261 'id': 'FqZTN594JQw',
3262 'ext': 'webm',
3263 'title': "Smiley's People 01 detective, Adventure Series, Action",
3264 'uploader': 'STREEM',
3265 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3266 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3267 'upload_date': '20150526',
3268 'license': 'Standard YouTube License',
3269 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3270 'categories': ['People & Blogs'],
3271 'tags': list,
3272 'view_count': int,
3273 'like_count': int,
3274 'dislike_count': int,
3275 },
3276 'params': {
3277 'skip_download': True,
3278 },
3279 'skip': 'This video is not available.',
3280 'add_ie': [YoutubeIE.ie_key()],
3281 }, {
3282 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3283 'only_matching': True,
3284 }, {
3285 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3286 'only_matching': True,
3287 }, {
3288 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3289 'info_dict': {
3290 'id': 'X1whbWASnNQ', # This will keep changing
3291 'ext': 'mp4',
3292 'title': compat_str,
3293 'uploader': 'Sky News',
3294 'uploader_id': 'skynews',
3295 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3296 'upload_date': r're:\d{8}',
3297 'description': compat_str,
3298 'categories': ['News & Politics'],
3299 'tags': list,
3300 'like_count': int,
3301 'dislike_count': int,
3302 },
3303 'params': {
3304 'skip_download': True,
3305 },
3306 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3307 }, {
3308 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3309 'info_dict': {
3310 'id': 'a48o2S1cPoo',
3311 'ext': 'mp4',
3312 'title': 'The Young Turks - Live Main Show',
3313 'uploader': 'The Young Turks',
3314 'uploader_id': 'TheYoungTurks',
3315 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3316 'upload_date': '20150715',
3317 'license': 'Standard YouTube License',
3318 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3319 'categories': ['News & Politics'],
3320 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3321 'like_count': int,
3322 'dislike_count': int,
3323 },
3324 'params': {
3325 'skip_download': True,
3326 },
3327 'only_matching': True,
3328 }, {
3329 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3330 'only_matching': True,
3331 }, {
3332 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3333 'only_matching': True,
3334 }, {
3335 'note': 'A channel that is not live. Should raise error',
3336 'url': 'https://www.youtube.com/user/numberphile/live',
3337 'only_matching': True,
3338 }, {
3339 'url': 'https://www.youtube.com/feed/trending',
3340 'only_matching': True,
3341 }, {
3342 'url': 'https://www.youtube.com/feed/library',
3343 'only_matching': True,
3344 }, {
3345 'url': 'https://www.youtube.com/feed/history',
3346 'only_matching': True,
3347 }, {
3348 'url': 'https://www.youtube.com/feed/subscriptions',
3349 'only_matching': True,
3350 }, {
3351 'url': 'https://www.youtube.com/feed/watch_later',
3352 'only_matching': True,
3353 }, {
3354 'note': 'Recommended - redirects to home page',
3355 'url': 'https://www.youtube.com/feed/recommended',
3356 'only_matching': True,
3357 }, {
3358 'note': 'inline playlist with not always working continuations',
3359 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3360 'only_matching': True,
3361 }, {
3362 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3363 'only_matching': True,
3364 }, {
3365 'url': 'https://www.youtube.com/course',
3366 'only_matching': True,
3367 }, {
3368 'url': 'https://www.youtube.com/zsecurity',
3369 'only_matching': True,
3370 }, {
3371 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3372 'only_matching': True,
3373 }, {
3374 'url': 'https://www.youtube.com/TheYoungTurks/live',
3375 'only_matching': True,
3376 }, {
3377 'url': 'https://www.youtube.com/hashtag/cctv9',
3378 'info_dict': {
3379 'id': 'cctv9',
3380 'title': '#cctv9',
3381 },
3382 'playlist_mincount': 350,
3383 }, {
3384 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3385 'only_matching': True,
3386 }, {
3387 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3388 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3389 'only_matching': True
3390 }, {
3391 'note': '/browse/ should redirect to /channel/',
3392 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3393 'only_matching': True
3394 }, {
3395 'note': 'VLPL, should redirect to playlist?list=PL...',
3396 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3397 'info_dict': {
3398 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3399 'uploader': 'NoCopyrightSounds',
3400 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3401 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3402 'title': 'NCS Releases',
3403 },
3404 'playlist_mincount': 166,
3405 }, {
3406 'note': 'Topic, should redirect to playlist?list=UU...',
3407 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3408 'info_dict': {
3409 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3410 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3411 'title': 'Uploads from Royalty Free Music - Topic',
3412 'uploader': 'Royalty Free Music - Topic',
3413 },
3414 'expected_warnings': [
3415 'A channel/user page was given',
3416 'The URL does not have a videos tab',
3417 ],
3418 'playlist_mincount': 101,
3419 }, {
3420 'note': 'Topic without a UU playlist',
3421 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3422 'info_dict': {
3423 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3424 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3425 },
3426 'expected_warnings': [
3427 'A channel/user page was given',
3428 'The URL does not have a videos tab',
3429 'Falling back to channel URL',
3430 ],
3431 'playlist_mincount': 9,
3432 }, {
3433 'note': 'Youtube music Album',
3434 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3435 'info_dict': {
3436 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3437 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3438 },
3439 'playlist_count': 50,
3440 }, {
3441 'note': 'unlisted single video playlist',
3442 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3443 'info_dict': {
3444 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3445 'uploader': 'colethedj',
3446 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3447 'title': 'yt-dlp unlisted playlist test',
3448 'availability': 'unlisted'
3449 },
3450 'playlist_count': 1,
3451 }]
3452
3453 @classmethod
3454 def suitable(cls, url):
3455 return False if YoutubeIE.suitable(url) else super(
3456 YoutubeTabIE, cls).suitable(url)
3457
3458 def _extract_channel_id(self, webpage):
3459 channel_id = self._html_search_meta(
3460 'channelId', webpage, 'channel id', default=None)
3461 if channel_id:
3462 return channel_id
3463 channel_url = self._html_search_meta(
3464 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3465 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3466 'twitter:app:url:googleplay'), webpage, 'channel url')
3467 return self._search_regex(
3468 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3469 channel_url, 'channel id')
3470
3471 @staticmethod
3472 def _extract_basic_item_renderer(item):
3473 # Modified from _extract_grid_item_renderer
3474 known_basic_renderers = (
3475 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3476 )
3477 for key, renderer in item.items():
3478 if not isinstance(renderer, dict):
3479 continue
3480 elif key in known_basic_renderers:
3481 return renderer
3482 elif key.startswith('grid') and key.endswith('Renderer'):
3483 return renderer
3484
3485 def _grid_entries(self, grid_renderer):
3486 for item in grid_renderer['items']:
3487 if not isinstance(item, dict):
3488 continue
3489 renderer = self._extract_basic_item_renderer(item)
3490 if not isinstance(renderer, dict):
3491 continue
3492 title = self._get_text(renderer.get('title'))
3493
3494 # playlist
3495 playlist_id = renderer.get('playlistId')
3496 if playlist_id:
3497 yield self.url_result(
3498 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3499 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3500 video_title=title)
3501 continue
3502 # video
3503 video_id = renderer.get('videoId')
3504 if video_id:
3505 yield self._extract_video(renderer)
3506 continue
3507 # channel
3508 channel_id = renderer.get('channelId')
3509 if channel_id:
3510 yield self.url_result(
3511 'https://www.youtube.com/channel/%s' % channel_id,
3512 ie=YoutubeTabIE.ie_key(), video_title=title)
3513 continue
3514 # generic endpoint URL support
3515 ep_url = urljoin('https://www.youtube.com/', try_get(
3516 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3517 compat_str))
3518 if ep_url:
3519 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3520 if ie.suitable(ep_url):
3521 yield self.url_result(
3522 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3523 break
3524
3525 def _shelf_entries_from_content(self, shelf_renderer):
3526 content = shelf_renderer.get('content')
3527 if not isinstance(content, dict):
3528 return
3529 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3530 if renderer:
3531 # TODO: add support for nested playlists so each shelf is processed
3532 # as separate playlist
3533 # TODO: this includes only first N items
3534 for entry in self._grid_entries(renderer):
3535 yield entry
3536 renderer = content.get('horizontalListRenderer')
3537 if renderer:
3538 # TODO
3539 pass
3540
3541 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3542 ep = try_get(
3543 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3544 compat_str)
3545 shelf_url = urljoin('https://www.youtube.com', ep)
3546 if shelf_url:
3547 # Skipping links to another channels, note that checking for
3548 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3549 # will not work
3550 if skip_channels and '/channels?' in shelf_url:
3551 return
3552 title = self._get_text(shelf_renderer, lambda x: x['title'])
3553 yield self.url_result(shelf_url, video_title=title)
3554 # Shelf may not contain shelf URL, fallback to extraction from content
3555 for entry in self._shelf_entries_from_content(shelf_renderer):
3556 yield entry
3557
3558 def _playlist_entries(self, video_list_renderer):
3559 for content in video_list_renderer['contents']:
3560 if not isinstance(content, dict):
3561 continue
3562 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3563 if not isinstance(renderer, dict):
3564 continue
3565 video_id = renderer.get('videoId')
3566 if not video_id:
3567 continue
3568 yield self._extract_video(renderer)
3569
3570 def _rich_entries(self, rich_grid_renderer):
3571 renderer = try_get(
3572 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3573 video_id = renderer.get('videoId')
3574 if not video_id:
3575 return
3576 yield self._extract_video(renderer)
3577
3578 def _video_entry(self, video_renderer):
3579 video_id = video_renderer.get('videoId')
3580 if video_id:
3581 return self._extract_video(video_renderer)
3582
3583 def _post_thread_entries(self, post_thread_renderer):
3584 post_renderer = try_get(
3585 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3586 if not post_renderer:
3587 return
3588 # video attachment
3589 video_renderer = try_get(
3590 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3591 video_id = video_renderer.get('videoId')
3592 if video_id:
3593 entry = self._extract_video(video_renderer)
3594 if entry:
3595 yield entry
3596 # playlist attachment
3597 playlist_id = try_get(
3598 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3599 if playlist_id:
3600 yield self.url_result(
3601 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3602 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3603 # inline video links
3604 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3605 for run in runs:
3606 if not isinstance(run, dict):
3607 continue
3608 ep_url = try_get(
3609 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3610 if not ep_url:
3611 continue
3612 if not YoutubeIE.suitable(ep_url):
3613 continue
3614 ep_video_id = YoutubeIE._match_id(ep_url)
3615 if video_id == ep_video_id:
3616 continue
3617 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3618
3619 def _post_thread_continuation_entries(self, post_thread_continuation):
3620 contents = post_thread_continuation.get('contents')
3621 if not isinstance(contents, list):
3622 return
3623 for content in contents:
3624 renderer = content.get('backstagePostThreadRenderer')
3625 if not isinstance(renderer, dict):
3626 continue
3627 for entry in self._post_thread_entries(renderer):
3628 yield entry
3629
3630 r''' # unused
3631 def _rich_grid_entries(self, contents):
3632 for content in contents:
3633 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3634 if video_renderer:
3635 entry = self._video_entry(video_renderer)
3636 if entry:
3637 yield entry
3638 '''
3639 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3640
3641 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3642 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3643 for content in contents:
3644 if not isinstance(content, dict):
3645 continue
3646 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3647 if not is_renderer:
3648 renderer = content.get('richItemRenderer')
3649 if renderer:
3650 for entry in self._rich_entries(renderer):
3651 yield entry
3652 continuation_list[0] = self._extract_continuation(parent_renderer)
3653 continue
3654 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3655 for isr_content in isr_contents:
3656 if not isinstance(isr_content, dict):
3657 continue
3658
3659 known_renderers = {
3660 'playlistVideoListRenderer': self._playlist_entries,
3661 'gridRenderer': self._grid_entries,
3662 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3663 'backstagePostThreadRenderer': self._post_thread_entries,
3664 'videoRenderer': lambda x: [self._video_entry(x)],
3665 }
3666 for key, renderer in isr_content.items():
3667 if key not in known_renderers:
3668 continue
3669 for entry in known_renderers[key](renderer):
3670 if entry:
3671 yield entry
3672 continuation_list[0] = self._extract_continuation(renderer)
3673 break
3674
3675 if not continuation_list[0]:
3676 continuation_list[0] = self._extract_continuation(is_renderer)
3677
3678 if not continuation_list[0]:
3679 continuation_list[0] = self._extract_continuation(parent_renderer)
3680
3681 continuation_list = [None] # Python 2 doesnot support nonlocal
3682 tab_content = try_get(tab, lambda x: x['content'], dict)
3683 if not tab_content:
3684 return
3685 parent_renderer = (
3686 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3687 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3688 for entry in extract_entries(parent_renderer):
3689 yield entry
3690 continuation = continuation_list[0]
3691 visitor_data = None
3692
3693 for page_num in itertools.count(1):
3694 if not continuation:
3695 break
3696 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3697 response = self._extract_response(
3698 item_id='%s page %s' % (item_id, page_num),
3699 query=continuation, headers=headers, ytcfg=ytcfg,
3700 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3701
3702 if not response:
3703 break
3704 visitor_data = try_get(
3705 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3706
3707 known_continuation_renderers = {
3708 'playlistVideoListContinuation': self._playlist_entries,
3709 'gridContinuation': self._grid_entries,
3710 'itemSectionContinuation': self._post_thread_continuation_entries,
3711 'sectionListContinuation': extract_entries, # for feeds
3712 }
3713 continuation_contents = try_get(
3714 response, lambda x: x['continuationContents'], dict) or {}
3715 continuation_renderer = None
3716 for key, value in continuation_contents.items():
3717 if key not in known_continuation_renderers:
3718 continue
3719 continuation_renderer = value
3720 continuation_list = [None]
3721 for entry in known_continuation_renderers[key](continuation_renderer):
3722 yield entry
3723 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3724 break
3725 if continuation_renderer:
3726 continue
3727
3728 known_renderers = {
3729 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3730 'gridVideoRenderer': (self._grid_entries, 'items'),
3731 'gridChannelRenderer': (self._grid_entries, 'items'),
3732 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3733 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3734 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3735 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3736 }
3737 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3738 continuation_items = try_get(
3739 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3740 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3741 video_items_renderer = None
3742 for key, value in continuation_item.items():
3743 if key not in known_renderers:
3744 continue
3745 video_items_renderer = {known_renderers[key][1]: continuation_items}
3746 continuation_list = [None]
3747 for entry in known_renderers[key][0](video_items_renderer):
3748 yield entry
3749 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3750 break
3751 if video_items_renderer:
3752 continue
3753 break
3754
3755 @staticmethod
3756 def _extract_selected_tab(tabs):
3757 for tab in tabs:
3758 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3759 if renderer.get('selected') is True:
3760 return renderer
3761 else:
3762 raise ExtractorError('Unable to find selected tab')
3763
3764 @classmethod
3765 def _extract_uploader(cls, data):
3766 uploader = {}
3767 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3768 owner = try_get(
3769 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3770 if owner:
3771 uploader['uploader'] = owner.get('text')
3772 uploader['uploader_id'] = try_get(
3773 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3774 uploader['uploader_url'] = urljoin(
3775 'https://www.youtube.com/',
3776 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3777 return {k: v for k, v in uploader.items() if v is not None}
3778
3779 def _extract_from_tabs(self, item_id, webpage, data, tabs):
3780 playlist_id = title = description = channel_url = channel_name = channel_id = None
3781 thumbnails_list = tags = []
3782
3783 selected_tab = self._extract_selected_tab(tabs)
3784 renderer = try_get(
3785 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3786 if renderer:
3787 channel_name = renderer.get('title')
3788 channel_url = renderer.get('channelUrl')
3789 channel_id = renderer.get('externalId')
3790 else:
3791 renderer = try_get(
3792 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3793
3794 if renderer:
3795 title = renderer.get('title')
3796 description = renderer.get('description', '')
3797 playlist_id = channel_id
3798 tags = renderer.get('keywords', '').split()
3799 thumbnails_list = (
3800 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3801 or try_get(
3802 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3803 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3804 list)
3805 or [])
3806
3807 thumbnails = []
3808 for t in thumbnails_list:
3809 if not isinstance(t, dict):
3810 continue
3811 thumbnail_url = url_or_none(t.get('url'))
3812 if not thumbnail_url:
3813 continue
3814 thumbnails.append({
3815 'url': thumbnail_url,
3816 'width': int_or_none(t.get('width')),
3817 'height': int_or_none(t.get('height')),
3818 })
3819 if playlist_id is None:
3820 playlist_id = item_id
3821 if title is None:
3822 title = (
3823 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3824 or playlist_id)
3825 title += format_field(selected_tab, 'title', ' - %s')
3826 title += format_field(selected_tab, 'expandedText', ' - %s')
3827 metadata = {
3828 'playlist_id': playlist_id,
3829 'playlist_title': title,
3830 'playlist_description': description,
3831 'uploader': channel_name,
3832 'uploader_id': channel_id,
3833 'uploader_url': channel_url,
3834 'thumbnails': thumbnails,
3835 'tags': tags,
3836 }
3837 availability = self._extract_availability(data)
3838 if availability:
3839 metadata['availability'] = availability
3840 if not channel_id:
3841 metadata.update(self._extract_uploader(data))
3842 metadata.update({
3843 'channel': metadata['uploader'],
3844 'channel_id': metadata['uploader_id'],
3845 'channel_url': metadata['uploader_url']})
3846 ytcfg = self._extract_ytcfg(item_id, webpage)
3847 return self.playlist_result(
3848 self._entries(
3849 selected_tab, playlist_id,
3850 self._extract_identity_token(webpage, item_id),
3851 self._extract_account_syncid(ytcfg, data), ytcfg),
3852 **metadata)
3853
3854 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
3855 first_id = last_id = None
3856 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3857 headers = self._generate_api_headers(
3858 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3859 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
3860 for page_num in itertools.count(1):
3861 videos = list(self._playlist_entries(playlist))
3862 if not videos:
3863 return
3864 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3865 if start >= len(videos):
3866 return
3867 for video in videos[start:]:
3868 if video['id'] == first_id:
3869 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3870 return
3871 yield video
3872 first_id = first_id or videos[0]['id']
3873 last_id = videos[-1]['id']
3874 watch_endpoint = try_get(
3875 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3876 query = {
3877 'playlistId': playlist_id,
3878 'videoId': watch_endpoint.get('videoId') or last_id,
3879 'index': watch_endpoint.get('index') or len(videos),
3880 'params': watch_endpoint.get('params') or 'OAE%3D'
3881 }
3882 response = self._extract_response(
3883 item_id='%s page %d' % (playlist_id, page_num),
3884 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3885 check_get_keys='contents'
3886 )
3887 playlist = try_get(
3888 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3889
3890 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
3891 title = playlist.get('title') or try_get(
3892 data, lambda x: x['titleText']['simpleText'], compat_str)
3893 playlist_id = playlist.get('playlistId') or item_id
3894
3895 # Delegating everything except mix playlists to regular tab-based playlist URL
3896 playlist_url = urljoin(url, try_get(
3897 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3898 compat_str))
3899 if playlist_url and playlist_url != url:
3900 return self.url_result(
3901 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3902 video_title=title)
3903
3904 return self.playlist_result(
3905 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
3906 playlist_id=playlist_id, playlist_title=title)
3907
3908 def _extract_availability(self, data):
3909 """
3910 Gets the availability of a given playlist/tab.
3911 Note: Unless YouTube tells us explicitly, we do not assume it is public
3912 @param data: response
3913 """
3914 is_private = is_unlisted = None
3915 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3916 badge_labels = self._extract_badges(renderer)
3917
3918 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3919 privacy_dropdown_entries = try_get(
3920 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3921 for renderer_dict in privacy_dropdown_entries:
3922 is_selected = try_get(
3923 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3924 if not is_selected:
3925 continue
3926 label = self._get_text(
3927 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
3928 if label:
3929 badge_labels.add(label.lower())
3930 break
3931
3932 for badge_label in badge_labels:
3933 if badge_label == 'unlisted':
3934 is_unlisted = True
3935 elif badge_label == 'private':
3936 is_private = True
3937 elif badge_label == 'public':
3938 is_unlisted = is_private = False
3939 return self._availability(is_private, False, False, False, is_unlisted)
3940
3941 @staticmethod
3942 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3943 sidebar_renderer = try_get(
3944 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3945 for item in sidebar_renderer:
3946 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3947 if renderer:
3948 return renderer
3949
3950 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3951 """
3952 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3953 """
3954 browse_id = params = None
3955 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3956 if not renderer:
3957 return
3958 menu_renderer = try_get(
3959 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3960 for menu_item in menu_renderer:
3961 if not isinstance(menu_item, dict):
3962 continue
3963 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3964 text = try_get(
3965 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3966 if not text or text.lower() != 'show unavailable videos':
3967 continue
3968 browse_endpoint = try_get(
3969 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3970 browse_id = browse_endpoint.get('browseId')
3971 params = browse_endpoint.get('params')
3972 break
3973
3974 ytcfg = self._extract_ytcfg(item_id, webpage)
3975 headers = self._generate_api_headers(
3976 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3977 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3978 visitor_data=try_get(
3979 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3980 query = {
3981 'params': params or 'wgYCCAA=',
3982 'browseId': browse_id or 'VL%s' % item_id
3983 }
3984 return self._extract_response(
3985 item_id=item_id, headers=headers, query=query,
3986 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
3987 note='Downloading API JSON with unavailable videos')
3988
3989 def _extract_webpage(self, url, item_id):
3990 retries = self.get_param('extractor_retries', 3)
3991 count = -1
3992 last_error = 'Incomplete yt initial data recieved'
3993 while count < retries:
3994 count += 1
3995 # Sometimes youtube returns a webpage with incomplete ytInitialData
3996 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3997 if count:
3998 self.report_warning('%s. Retrying ...' % last_error)
3999 webpage = self._download_webpage(
4000 url, item_id,
4001 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4002 data = self._extract_yt_initial_data(item_id, webpage)
4003 if data.get('contents') or data.get('currentVideoEndpoint'):
4004 break
4005 # Extract alerts here only when there is error
4006 self._extract_and_report_alerts(data)
4007 if count >= retries:
4008 raise ExtractorError(last_error)
4009 return webpage, data
4010
4011 @staticmethod
4012 def _smuggle_data(entries, data):
4013 for entry in entries:
4014 if data:
4015 entry['url'] = smuggle_url(entry['url'], data)
4016 yield entry
4017
4018 def _real_extract(self, url):
4019 url, smuggled_data = unsmuggle_url(url, {})
4020 if self.is_music_url(url):
4021 smuggled_data['is_music_url'] = True
4022 info_dict = self.__real_extract(url, smuggled_data)
4023 if info_dict.get('entries'):
4024 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4025 return info_dict
4026
4027 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4028
4029 def __real_extract(self, url, smuggled_data):
4030 item_id = self._match_id(url)
4031 url = compat_urlparse.urlunparse(
4032 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4033 compat_opts = self.get_param('compat_opts', [])
4034
4035 def get_mobj(url):
4036 mobj = self._url_re.match(url).groupdict()
4037 mobj.update((k, '') for k, v in mobj.items() if v is None)
4038 return mobj
4039
4040 mobj = get_mobj(url)
4041 # Youtube returns incomplete data if tabname is not lower case
4042 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4043
4044 if is_channel:
4045 if smuggled_data.get('is_music_url'):
4046 if item_id[:2] == 'VL':
4047 # Youtube music VL channels have an equivalent playlist
4048 item_id = item_id[2:]
4049 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4050 elif item_id[:2] == 'MP':
4051 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4052 item_id = self._search_regex(
4053 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4054 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4055 'playlist id')
4056 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4057 elif mobj['channel_type'] == 'browse':
4058 # Youtube music /browse/ should be changed to /channel/
4059 pre = 'https://www.youtube.com/channel/%s' % item_id
4060 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4061 # Home URLs should redirect to /videos/
4062 self.report_warning(
4063 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4064 'To download only the videos in the home page, add a "/featured" to the URL')
4065 tab = '/videos'
4066
4067 url = ''.join((pre, tab, post))
4068 mobj = get_mobj(url)
4069
4070 # Handle both video/playlist URLs
4071 qs = parse_qs(url)
4072 video_id = qs.get('v', [None])[0]
4073 playlist_id = qs.get('list', [None])[0]
4074
4075 if not video_id and mobj['not_channel'].startswith('watch'):
4076 if not playlist_id:
4077 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4078 raise ExtractorError('Unable to recognize tab page')
4079 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4080 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4081 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4082 mobj = get_mobj(url)
4083
4084 if video_id and playlist_id:
4085 if self.get_param('noplaylist'):
4086 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4087 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4088 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4089
4090 webpage, data = self._extract_webpage(url, item_id)
4091
4092 tabs = try_get(
4093 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4094 if tabs:
4095 selected_tab = self._extract_selected_tab(tabs)
4096 tab_name = selected_tab.get('title', '')
4097 if 'no-youtube-channel-redirect' not in compat_opts:
4098 if mobj['tab'] == '/live':
4099 # Live tab should have redirected to the video
4100 raise ExtractorError('The channel is not currently live', expected=True)
4101 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4102 if not mobj['not_channel'] and item_id[:2] == 'UC':
4103 # Topic channels don't have /videos. Use the equivalent playlist instead
4104 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4105 pl_id = 'UU%s' % item_id[2:]
4106 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4107 try:
4108 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4109 for alert_type, alert_message in self._extract_alerts(pl_data):
4110 if alert_type == 'error':
4111 raise ExtractorError('Youtube said: %s' % alert_message)
4112 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4113 except ExtractorError:
4114 self.report_warning('The playlist gave error. Falling back to channel URL')
4115 else:
4116 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4117
4118 self.write_debug('Final URL: %s' % url)
4119
4120 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4121 if 'no-youtube-unavailable-videos' not in compat_opts:
4122 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4123 self._extract_and_report_alerts(data)
4124 tabs = try_get(
4125 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4126 if tabs:
4127 return self._extract_from_tabs(item_id, webpage, data, tabs)
4128
4129 playlist = try_get(
4130 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4131 if playlist:
4132 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4133
4134 video_id = try_get(
4135 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4136 compat_str) or video_id
4137 if video_id:
4138 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4139 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4140 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4141
4142 raise ExtractorError('Unable to recognize tab page')
4143
4144
4145 class YoutubePlaylistIE(InfoExtractor):
4146 IE_DESC = 'YouTube.com playlists'
4147 _VALID_URL = r'''(?x)(?:
4148 (?:https?://)?
4149 (?:\w+\.)?
4150 (?:
4151 (?:
4152 youtube(?:kids)?\.com|
4153 invidio\.us
4154 )
4155 /.*?\?.*?\blist=
4156 )?
4157 (?P<id>%(playlist_id)s)
4158 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4159 IE_NAME = 'youtube:playlist'
4160 _TESTS = [{
4161 'note': 'issue #673',
4162 'url': 'PLBB231211A4F62143',
4163 'info_dict': {
4164 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4165 'id': 'PLBB231211A4F62143',
4166 'uploader': 'Wickydoo',
4167 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4168 },
4169 'playlist_mincount': 29,
4170 }, {
4171 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4172 'info_dict': {
4173 'title': 'YDL_safe_search',
4174 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4175 },
4176 'playlist_count': 2,
4177 'skip': 'This playlist is private',
4178 }, {
4179 'note': 'embedded',
4180 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4181 'playlist_count': 4,
4182 'info_dict': {
4183 'title': 'JODA15',
4184 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4185 'uploader': 'milan',
4186 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4187 }
4188 }, {
4189 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4190 'playlist_mincount': 982,
4191 'info_dict': {
4192 'title': '2018 Chinese New Singles (11/6 updated)',
4193 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4194 'uploader': 'LBK',
4195 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4196 }
4197 }, {
4198 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4199 'only_matching': True,
4200 }, {
4201 # music album playlist
4202 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4203 'only_matching': True,
4204 }]
4205
4206 @classmethod
4207 def suitable(cls, url):
4208 if YoutubeTabIE.suitable(url):
4209 return False
4210 # Hack for lazy extractors until more generic solution is implemented
4211 # (see #28780)
4212 from .youtube import parse_qs
4213 qs = parse_qs(url)
4214 if qs.get('v', [None])[0]:
4215 return False
4216 return super(YoutubePlaylistIE, cls).suitable(url)
4217
4218 def _real_extract(self, url):
4219 playlist_id = self._match_id(url)
4220 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4221 url = update_url_query(
4222 'https://www.youtube.com/playlist',
4223 parse_qs(url) or {'list': playlist_id})
4224 if is_music_url:
4225 url = smuggle_url(url, {'is_music_url': True})
4226 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4227
4228
4229 class YoutubeYtBeIE(InfoExtractor):
4230 IE_DESC = 'youtu.be'
4231 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4232 _TESTS = [{
4233 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4234 'info_dict': {
4235 'id': 'yeWKywCrFtk',
4236 'ext': 'mp4',
4237 'title': 'Small Scale Baler and Braiding Rugs',
4238 'uploader': 'Backus-Page House Museum',
4239 'uploader_id': 'backuspagemuseum',
4240 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4241 'upload_date': '20161008',
4242 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4243 'categories': ['Nonprofits & Activism'],
4244 'tags': list,
4245 'like_count': int,
4246 'dislike_count': int,
4247 },
4248 'params': {
4249 'noplaylist': True,
4250 'skip_download': True,
4251 },
4252 }, {
4253 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4254 'only_matching': True,
4255 }]
4256
4257 def _real_extract(self, url):
4258 mobj = re.match(self._VALID_URL, url)
4259 video_id = mobj.group('id')
4260 playlist_id = mobj.group('playlist_id')
4261 return self.url_result(
4262 update_url_query('https://www.youtube.com/watch', {
4263 'v': video_id,
4264 'list': playlist_id,
4265 'feature': 'youtu.be',
4266 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4267
4268
4269 class YoutubeYtUserIE(InfoExtractor):
4270 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4271 _VALID_URL = r'ytuser:(?P<id>.+)'
4272 _TESTS = [{
4273 'url': 'ytuser:phihag',
4274 'only_matching': True,
4275 }]
4276
4277 def _real_extract(self, url):
4278 user_id = self._match_id(url)
4279 return self.url_result(
4280 'https://www.youtube.com/user/%s' % user_id,
4281 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4282
4283
4284 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4285 IE_NAME = 'youtube:favorites'
4286 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4287 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4288 _LOGIN_REQUIRED = True
4289 _TESTS = [{
4290 'url': ':ytfav',
4291 'only_matching': True,
4292 }, {
4293 'url': ':ytfavorites',
4294 'only_matching': True,
4295 }]
4296
4297 def _real_extract(self, url):
4298 return self.url_result(
4299 'https://www.youtube.com/playlist?list=LL',
4300 ie=YoutubeTabIE.ie_key())
4301
4302
4303 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4304 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4305 # there doesn't appear to be a real limit, for example if you search for
4306 # 'python' you get more than 8.000.000 results
4307 _MAX_RESULTS = float('inf')
4308 IE_NAME = 'youtube:search'
4309 _SEARCH_KEY = 'ytsearch'
4310 _SEARCH_PARAMS = None
4311 _TESTS = []
4312
4313 def _entries(self, query, n):
4314 data = {'query': query}
4315 if self._SEARCH_PARAMS:
4316 data['params'] = self._SEARCH_PARAMS
4317 total = 0
4318 continuation = {}
4319 for page_num in itertools.count(1):
4320 data.update(continuation)
4321 search = self._extract_response(
4322 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4323 check_get_keys=('contents', 'onResponseReceivedCommands')
4324 )
4325 if not search:
4326 break
4327 slr_contents = try_get(
4328 search,
4329 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4330 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4331 list)
4332 if not slr_contents:
4333 break
4334
4335 # Youtube sometimes adds promoted content to searches,
4336 # changing the index location of videos and token.
4337 # So we search through all entries till we find them.
4338 continuation = None
4339 for slr_content in slr_contents:
4340 if not continuation:
4341 continuation = self._extract_continuation({'contents': [slr_content]})
4342
4343 isr_contents = try_get(
4344 slr_content,
4345 lambda x: x['itemSectionRenderer']['contents'],
4346 list)
4347 if not isr_contents:
4348 continue
4349 for content in isr_contents:
4350 if not isinstance(content, dict):
4351 continue
4352 video = content.get('videoRenderer')
4353 if not isinstance(video, dict):
4354 continue
4355 video_id = video.get('videoId')
4356 if not video_id:
4357 continue
4358
4359 yield self._extract_video(video)
4360 total += 1
4361 if total == n:
4362 return
4363
4364 if not continuation:
4365 break
4366
4367 def _get_n_results(self, query, n):
4368 """Get a specified number of results for a query"""
4369 return self.playlist_result(self._entries(query, n), query)
4370
4371
4372 class YoutubeSearchDateIE(YoutubeSearchIE):
4373 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4374 _SEARCH_KEY = 'ytsearchdate'
4375 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4376 _SEARCH_PARAMS = 'CAI%3D'
4377
4378
4379 class YoutubeSearchURLIE(YoutubeSearchIE):
4380 IE_DESC = 'YouTube.com search URLs'
4381 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4382 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4383 # _MAX_RESULTS = 100
4384 _TESTS = [{
4385 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4386 'playlist_mincount': 5,
4387 'info_dict': {
4388 'title': 'youtube-dl test video',
4389 }
4390 }, {
4391 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4392 'only_matching': True,
4393 }]
4394
4395 @classmethod
4396 def _make_valid_url(cls):
4397 return cls._VALID_URL
4398
4399 def _real_extract(self, url):
4400 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4401 query = (qs.get('search_query') or qs.get('q'))[0]
4402 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4403 return self._get_n_results(query, self._MAX_RESULTS)
4404
4405
4406 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4407 """
4408 Base class for feed extractors
4409 Subclasses must define the _FEED_NAME property.
4410 """
4411 _LOGIN_REQUIRED = True
4412 _TESTS = []
4413
4414 @property
4415 def IE_NAME(self):
4416 return 'youtube:%s' % self._FEED_NAME
4417
4418 def _real_extract(self, url):
4419 return self.url_result(
4420 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4421 ie=YoutubeTabIE.ie_key())
4422
4423
4424 class YoutubeWatchLaterIE(InfoExtractor):
4425 IE_NAME = 'youtube:watchlater'
4426 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4427 _VALID_URL = r':ytwatchlater'
4428 _TESTS = [{
4429 'url': ':ytwatchlater',
4430 'only_matching': True,
4431 }]
4432
4433 def _real_extract(self, url):
4434 return self.url_result(
4435 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4436
4437
4438 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4439 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4440 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4441 _FEED_NAME = 'recommended'
4442 _LOGIN_REQUIRED = False
4443 _TESTS = [{
4444 'url': ':ytrec',
4445 'only_matching': True,
4446 }, {
4447 'url': ':ytrecommended',
4448 'only_matching': True,
4449 }, {
4450 'url': 'https://youtube.com',
4451 'only_matching': True,
4452 }]
4453
4454
4455 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4456 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4457 _VALID_URL = r':ytsub(?:scription)?s?'
4458 _FEED_NAME = 'subscriptions'
4459 _TESTS = [{
4460 'url': ':ytsubs',
4461 'only_matching': True,
4462 }, {
4463 'url': ':ytsubscriptions',
4464 'only_matching': True,
4465 }]
4466
4467
4468 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4469 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4470 _VALID_URL = r':ythis(?:tory)?'
4471 _FEED_NAME = 'history'
4472 _TESTS = [{
4473 'url': ':ythistory',
4474 'only_matching': True,
4475 }]
4476
4477
4478 class YoutubeTruncatedURLIE(InfoExtractor):
4479 IE_NAME = 'youtube:truncated_url'
4480 IE_DESC = False # Do not list
4481 _VALID_URL = r'''(?x)
4482 (?:https?://)?
4483 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4484 (?:watch\?(?:
4485 feature=[a-z_]+|
4486 annotation_id=annotation_[^&]+|
4487 x-yt-cl=[0-9]+|
4488 hl=[^&]*|
4489 t=[0-9]+
4490 )?
4491 |
4492 attribution_link\?a=[^&]+
4493 )
4494 $
4495 '''
4496
4497 _TESTS = [{
4498 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4499 'only_matching': True,
4500 }, {
4501 'url': 'https://www.youtube.com/watch?',
4502 'only_matching': True,
4503 }, {
4504 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4505 'only_matching': True,
4506 }, {
4507 'url': 'https://www.youtube.com/watch?feature=foo',
4508 'only_matching': True,
4509 }, {
4510 'url': 'https://www.youtube.com/watch?hl=en-GB',
4511 'only_matching': True,
4512 }, {
4513 'url': 'https://www.youtube.com/watch?t=2372',
4514 'only_matching': True,
4515 }]
4516
4517 def _real_extract(self, url):
4518 raise ExtractorError(
4519 'Did you forget to quote the URL? Remember that & is a meta '
4520 'character in most shells, so you want to put the URL in quotes, '
4521 'like youtube-dl '
4522 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4523 ' or simply youtube-dl BaW_jenozKc .',
4524 expected=True)
4525
4526
4527 class YoutubeTruncatedIDIE(InfoExtractor):
4528 IE_NAME = 'youtube:truncated_id'
4529 IE_DESC = False # Do not list
4530 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4531
4532 _TESTS = [{
4533 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4534 'only_matching': True,
4535 }]
4536
4537 def _real_extract(self, url):
4538 video_id = self._match_id(url)
4539 raise ExtractorError(
4540 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4541 expected=True)