]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/youtube.py
[npr] Make SMIL extraction non-fatal (#2099)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
... / ...
CommitLineData
1# coding: utf-8
2
3from __future__ import unicode_literals
4
5import calendar
6import copy
7import datetime
8import functools
9import hashlib
10import itertools
11import json
12import math
13import os.path
14import random
15import re
16import sys
17import time
18import traceback
19import threading
20
21from .common import InfoExtractor, SearchInfoExtractor
22from ..compat import (
23 compat_chr,
24 compat_HTTPError,
25 compat_parse_qs,
26 compat_str,
27 compat_urllib_parse_unquote_plus,
28 compat_urllib_parse_urlencode,
29 compat_urllib_parse_urlparse,
30 compat_urlparse,
31)
32from ..jsinterp import JSInterpreter
33from ..utils import (
34 bug_reports_message,
35 clean_html,
36 datetime_from_str,
37 dict_get,
38 error_to_compat_str,
39 ExtractorError,
40 float_or_none,
41 format_field,
42 int_or_none,
43 is_html,
44 join_nonempty,
45 mimetype2ext,
46 network_exceptions,
47 NO_DEFAULT,
48 orderedSet,
49 parse_codecs,
50 parse_count,
51 parse_duration,
52 parse_iso8601,
53 parse_qs,
54 qualities,
55 remove_end,
56 remove_start,
57 smuggle_url,
58 str_or_none,
59 str_to_int,
60 strftime_or_none,
61 traverse_obj,
62 try_get,
63 unescapeHTML,
64 unified_strdate,
65 unsmuggle_url,
66 update_url_query,
67 url_or_none,
68 urljoin,
69 variadic,
70)
71
72
73def get_first(obj, keys, **kwargs):
74 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
75
76
77# any clients starting with _ cannot be explicity requested by the user
78INNERTUBE_CLIENTS = {
79 'web': {
80 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
81 'INNERTUBE_CONTEXT': {
82 'client': {
83 'clientName': 'WEB',
84 'clientVersion': '2.20210622.10.00',
85 }
86 },
87 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
88 },
89 'web_embedded': {
90 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
91 'INNERTUBE_CONTEXT': {
92 'client': {
93 'clientName': 'WEB_EMBEDDED_PLAYER',
94 'clientVersion': '1.20210620.0.1',
95 },
96 },
97 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
98 },
99 'web_music': {
100 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
101 'INNERTUBE_HOST': 'music.youtube.com',
102 'INNERTUBE_CONTEXT': {
103 'client': {
104 'clientName': 'WEB_REMIX',
105 'clientVersion': '1.20210621.00.00',
106 }
107 },
108 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
109 },
110 'web_creator': {
111 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
112 'INNERTUBE_CONTEXT': {
113 'client': {
114 'clientName': 'WEB_CREATOR',
115 'clientVersion': '1.20210621.00.00',
116 }
117 },
118 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
119 },
120 'android': {
121 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
122 'INNERTUBE_CONTEXT': {
123 'client': {
124 'clientName': 'ANDROID',
125 'clientVersion': '16.20',
126 }
127 },
128 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
129 'REQUIRE_JS_PLAYER': False
130 },
131 'android_embedded': {
132 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
133 'INNERTUBE_CONTEXT': {
134 'client': {
135 'clientName': 'ANDROID_EMBEDDED_PLAYER',
136 'clientVersion': '16.20',
137 },
138 },
139 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
140 'REQUIRE_JS_PLAYER': False
141 },
142 'android_music': {
143 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
144 'INNERTUBE_HOST': 'music.youtube.com',
145 'INNERTUBE_CONTEXT': {
146 'client': {
147 'clientName': 'ANDROID_MUSIC',
148 'clientVersion': '4.32',
149 }
150 },
151 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
152 'REQUIRE_JS_PLAYER': False
153 },
154 'android_creator': {
155 'INNERTUBE_CONTEXT': {
156 'client': {
157 'clientName': 'ANDROID_CREATOR',
158 'clientVersion': '21.24.100',
159 },
160 },
161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
162 'REQUIRE_JS_PLAYER': False
163 },
164 # ios has HLS live streams
165 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
166 'ios': {
167 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
168 'INNERTUBE_CONTEXT': {
169 'client': {
170 'clientName': 'IOS',
171 'clientVersion': '16.20',
172 }
173 },
174 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
175 'REQUIRE_JS_PLAYER': False
176 },
177 'ios_embedded': {
178 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
179 'INNERTUBE_CONTEXT': {
180 'client': {
181 'clientName': 'IOS_MESSAGES_EXTENSION',
182 'clientVersion': '16.20',
183 },
184 },
185 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
186 'REQUIRE_JS_PLAYER': False
187 },
188 'ios_music': {
189 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
190 'INNERTUBE_HOST': 'music.youtube.com',
191 'INNERTUBE_CONTEXT': {
192 'client': {
193 'clientName': 'IOS_MUSIC',
194 'clientVersion': '4.32',
195 },
196 },
197 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
198 'REQUIRE_JS_PLAYER': False
199 },
200 'ios_creator': {
201 'INNERTUBE_CONTEXT': {
202 'client': {
203 'clientName': 'IOS_CREATOR',
204 'clientVersion': '21.24.100',
205 },
206 },
207 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
208 'REQUIRE_JS_PLAYER': False
209 },
210 # mweb has 'ultralow' formats
211 # See: https://github.com/yt-dlp/yt-dlp/pull/557
212 'mweb': {
213 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
214 'INNERTUBE_CONTEXT': {
215 'client': {
216 'clientName': 'MWEB',
217 'clientVersion': '2.20210721.07.00',
218 }
219 },
220 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
221 },
222}
223
224
225def build_innertube_clients():
226 third_party = {
227 'embedUrl': 'https://google.com', # Can be any valid URL
228 }
229 base_clients = ('android', 'web', 'ios', 'mweb')
230 priority = qualities(base_clients[::-1])
231
232 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
233 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
234 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
235 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
236 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
237 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
238
239 if client in base_clients:
240 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
241 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
242 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
243 agegate_ytcfg['priority'] -= 1
244 elif client.endswith('_embedded'):
245 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
246 ytcfg['priority'] -= 2
247 else:
248 ytcfg['priority'] -= 3
249
250
251build_innertube_clients()
252
253
254class YoutubeBaseInfoExtractor(InfoExtractor):
255 """Provide base functions for Youtube extractors"""
256
257 _RESERVED_NAMES = (
258 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
259 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
260 r'browse|oembed|get_video_info|iframe_api|s/player|'
261 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
262
263 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
264
265 _NETRC_MACHINE = 'youtube'
266
267 # If True it will raise an error if no login info is provided
268 _LOGIN_REQUIRED = False
269
270 _INVIDIOUS_SITES = (
271 # invidious-redirect websites
272 r'(?:www\.)?redirect\.invidious\.io',
273 r'(?:(?:www|dev)\.)?invidio\.us',
274 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
275 r'(?:www\.)?invidious\.pussthecat\.org',
276 r'(?:www\.)?invidious\.zee\.li',
277 r'(?:www\.)?invidious\.ethibox\.fr',
278 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
279 # youtube-dl invidious instances list
280 r'(?:(?:www|no)\.)?invidiou\.sh',
281 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
282 r'(?:www\.)?invidious\.kabi\.tk',
283 r'(?:www\.)?invidious\.mastodon\.host',
284 r'(?:www\.)?invidious\.zapashcanon\.fr',
285 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
286 r'(?:www\.)?invidious\.tinfoil-hat\.net',
287 r'(?:www\.)?invidious\.himiko\.cloud',
288 r'(?:www\.)?invidious\.reallyancient\.tech',
289 r'(?:www\.)?invidious\.tube',
290 r'(?:www\.)?invidiou\.site',
291 r'(?:www\.)?invidious\.site',
292 r'(?:www\.)?invidious\.xyz',
293 r'(?:www\.)?invidious\.nixnet\.xyz',
294 r'(?:www\.)?invidious\.048596\.xyz',
295 r'(?:www\.)?invidious\.drycat\.fr',
296 r'(?:www\.)?inv\.skyn3t\.in',
297 r'(?:www\.)?tube\.poal\.co',
298 r'(?:www\.)?tube\.connect\.cafe',
299 r'(?:www\.)?vid\.wxzm\.sx',
300 r'(?:www\.)?vid\.mint\.lgbt',
301 r'(?:www\.)?vid\.puffyan\.us',
302 r'(?:www\.)?yewtu\.be',
303 r'(?:www\.)?yt\.elukerio\.org',
304 r'(?:www\.)?yt\.lelux\.fi',
305 r'(?:www\.)?invidious\.ggc-project\.de',
306 r'(?:www\.)?yt\.maisputain\.ovh',
307 r'(?:www\.)?ytprivate\.com',
308 r'(?:www\.)?invidious\.13ad\.de',
309 r'(?:www\.)?invidious\.toot\.koeln',
310 r'(?:www\.)?invidious\.fdn\.fr',
311 r'(?:www\.)?watch\.nettohikari\.com',
312 r'(?:www\.)?invidious\.namazso\.eu',
313 r'(?:www\.)?invidious\.silkky\.cloud',
314 r'(?:www\.)?invidious\.exonip\.de',
315 r'(?:www\.)?invidious\.riverside\.rocks',
316 r'(?:www\.)?invidious\.blamefran\.net',
317 r'(?:www\.)?invidious\.moomoo\.de',
318 r'(?:www\.)?ytb\.trom\.tf',
319 r'(?:www\.)?yt\.cyberhost\.uk',
320 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
321 r'(?:www\.)?qklhadlycap4cnod\.onion',
322 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
323 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
324 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
325 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
326 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
327 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
328 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
329 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
330 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
331 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
332 )
333
334 def _login(self):
335 """
336 Attempt to log in to YouTube.
337 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
338 """
339
340 if (self._LOGIN_REQUIRED
341 and self.get_param('cookiefile') is None
342 and self.get_param('cookiesfrombrowser') is None):
343 self.raise_login_required(
344 'Login details are needed to download this content', method='cookies')
345 username, password = self._get_login_info()
346 if username:
347 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
348
349 def _initialize_consent(self):
350 cookies = self._get_cookies('https://www.youtube.com/')
351 if cookies.get('__Secure-3PSID'):
352 return
353 consent_id = None
354 consent = cookies.get('CONSENT')
355 if consent:
356 if 'YES' in consent.value:
357 return
358 consent_id = self._search_regex(
359 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
360 if not consent_id:
361 consent_id = random.randint(100, 999)
362 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
363
364 def _initialize_pref(self):
365 cookies = self._get_cookies('https://www.youtube.com/')
366 pref_cookie = cookies.get('PREF')
367 pref = {}
368 if pref_cookie:
369 try:
370 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
371 except ValueError:
372 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
373 pref.update({'hl': 'en'})
374 self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
375
376 def _real_initialize(self):
377 self._initialize_pref()
378 self._initialize_consent()
379 self._login()
380
381 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
382 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
383 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
384
385 def _get_default_ytcfg(self, client='web'):
386 return copy.deepcopy(INNERTUBE_CLIENTS[client])
387
388 def _get_innertube_host(self, client='web'):
389 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
390
391 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
392 # try_get but with fallback to default ytcfg client values when present
393 _func = lambda y: try_get(y, getter, expected_type)
394 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
395
396 def _extract_client_name(self, ytcfg, default_client='web'):
397 return self._ytcfg_get_safe(
398 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
399 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
400
401 def _extract_client_version(self, ytcfg, default_client='web'):
402 return self._ytcfg_get_safe(
403 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
404 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
405
406 def _extract_api_key(self, ytcfg=None, default_client='web'):
407 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
408
409 def _extract_context(self, ytcfg=None, default_client='web'):
410 context = get_first(
411 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
412 # Enforce language for extraction
413 traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'
414 return context
415
416 _SAPISID = None
417
418 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
419 time_now = round(time.time())
420 if self._SAPISID is None:
421 yt_cookies = self._get_cookies('https://www.youtube.com')
422 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
423 # See: https://github.com/yt-dlp/yt-dlp/issues/393
424 sapisid_cookie = dict_get(
425 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
426 if sapisid_cookie and sapisid_cookie.value:
427 self._SAPISID = sapisid_cookie.value
428 self.write_debug('Extracted SAPISID cookie')
429 # SAPISID cookie is required if not already present
430 if not yt_cookies.get('SAPISID'):
431 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
432 self._set_cookie(
433 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
434 else:
435 self._SAPISID = False
436 if not self._SAPISID:
437 return None
438 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
439 sapisidhash = hashlib.sha1(
440 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
441 return f'SAPISIDHASH {time_now}_{sapisidhash}'
442
443 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
444 note='Downloading API JSON', errnote='Unable to download API page',
445 context=None, api_key=None, api_hostname=None, default_client='web'):
446
447 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
448 data.update(query)
449 real_headers = self.generate_api_headers(default_client=default_client)
450 real_headers.update({'content-type': 'application/json'})
451 if headers:
452 real_headers.update(headers)
453 return self._download_json(
454 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
455 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
456 data=json.dumps(data).encode('utf8'), headers=real_headers,
457 query={'key': api_key or self._extract_api_key()})
458
459 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
460 data = self._search_regex(
461 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
462 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
463 if data:
464 return self._parse_json(data, item_id, fatal=fatal)
465
466 @staticmethod
467 def _extract_session_index(*data):
468 """
469 Index of current account in account list.
470 See: https://github.com/yt-dlp/yt-dlp/pull/519
471 """
472 for ytcfg in data:
473 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
474 if session_index is not None:
475 return session_index
476
477 # Deprecated?
478 def _extract_identity_token(self, ytcfg=None, webpage=None):
479 if ytcfg:
480 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
481 if token:
482 return token
483 if webpage:
484 return self._search_regex(
485 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
486 'identity token', default=None, fatal=False)
487
488 @staticmethod
489 def _extract_account_syncid(*args):
490 """
491 Extract syncId required to download private playlists of secondary channels
492 @params response and/or ytcfg
493 """
494 for data in args:
495 # ytcfg includes channel_syncid if on secondary channel
496 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
497 if delegated_sid:
498 return delegated_sid
499 sync_ids = (try_get(
500 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
501 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
502 if len(sync_ids) >= 2 and sync_ids[1]:
503 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
504 # and just "user_syncid||" for primary channel. We only want the channel_syncid
505 return sync_ids[0]
506
507 @staticmethod
508 def _extract_visitor_data(*args):
509 """
510 Extracts visitorData from an API response or ytcfg
511 Appears to be used to track session state
512 """
513 return get_first(
514 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
515 expected_type=str)
516
517 @property
518 def is_authenticated(self):
519 return bool(self._generate_sapisidhash_header())
520
521 def extract_ytcfg(self, video_id, webpage):
522 if not webpage:
523 return {}
524 return self._parse_json(
525 self._search_regex(
526 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
527 default='{}'), video_id, fatal=False) or {}
528
529 def generate_api_headers(
530 self, *, ytcfg=None, account_syncid=None, session_index=None,
531 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
532
533 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
534 headers = {
535 'X-YouTube-Client-Name': compat_str(
536 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
537 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
538 'Origin': origin,
539 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
540 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
541 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
542 }
543 if session_index is None:
544 session_index = self._extract_session_index(ytcfg)
545 if account_syncid or session_index is not None:
546 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
547
548 auth = self._generate_sapisidhash_header(origin)
549 if auth is not None:
550 headers['Authorization'] = auth
551 headers['X-Origin'] = origin
552 return {h: v for h, v in headers.items() if v is not None}
553
554 @staticmethod
555 def _build_api_continuation_query(continuation, ctp=None):
556 query = {
557 'continuation': continuation
558 }
559 # TODO: Inconsistency with clickTrackingParams.
560 # Currently we have a fixed ctp contained within context (from ytcfg)
561 # and a ctp in root query for continuation.
562 if ctp:
563 query['clickTracking'] = {'clickTrackingParams': ctp}
564 return query
565
566 @classmethod
567 def _extract_next_continuation_data(cls, renderer):
568 next_continuation = try_get(
569 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
570 lambda x: x['continuation']['reloadContinuationData']), dict)
571 if not next_continuation:
572 return
573 continuation = next_continuation.get('continuation')
574 if not continuation:
575 return
576 ctp = next_continuation.get('clickTrackingParams')
577 return cls._build_api_continuation_query(continuation, ctp)
578
579 @classmethod
580 def _extract_continuation_ep_data(cls, continuation_ep: dict):
581 if isinstance(continuation_ep, dict):
582 continuation = try_get(
583 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
584 if not continuation:
585 return
586 ctp = continuation_ep.get('clickTrackingParams')
587 return cls._build_api_continuation_query(continuation, ctp)
588
589 @classmethod
590 def _extract_continuation(cls, renderer):
591 next_continuation = cls._extract_next_continuation_data(renderer)
592 if next_continuation:
593 return next_continuation
594
595 contents = []
596 for key in ('contents', 'items'):
597 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
598
599 for content in contents:
600 if not isinstance(content, dict):
601 continue
602 continuation_ep = try_get(
603 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
604 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
605 dict)
606 continuation = cls._extract_continuation_ep_data(continuation_ep)
607 if continuation:
608 return continuation
609
610 @classmethod
611 def _extract_alerts(cls, data):
612 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
613 if not isinstance(alert_dict, dict):
614 continue
615 for alert in alert_dict.values():
616 alert_type = alert.get('type')
617 if not alert_type:
618 continue
619 message = cls._get_text(alert, 'text')
620 if message:
621 yield alert_type, message
622
623 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
624 errors = []
625 warnings = []
626 for alert_type, alert_message in alerts:
627 if alert_type.lower() == 'error' and fatal:
628 errors.append([alert_type, alert_message])
629 else:
630 warnings.append([alert_type, alert_message])
631
632 for alert_type, alert_message in (warnings + errors[:-1]):
633 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
634 if errors:
635 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
636
637 def _extract_and_report_alerts(self, data, *args, **kwargs):
638 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
639
640 def _extract_badges(self, renderer: dict):
641 badges = set()
642 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
643 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
644 if label:
645 badges.add(label.lower())
646 return badges
647
648 @staticmethod
649 def _get_text(data, *path_list, max_runs=None):
650 for path in path_list or [None]:
651 if path is None:
652 obj = [data]
653 else:
654 obj = traverse_obj(data, path, default=[])
655 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
656 obj = [obj]
657 for item in obj:
658 text = try_get(item, lambda x: x['simpleText'], compat_str)
659 if text:
660 return text
661 runs = try_get(item, lambda x: x['runs'], list) or []
662 if not runs and isinstance(item, list):
663 runs = item
664
665 runs = runs[:min(len(runs), max_runs or len(runs))]
666 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
667 if text:
668 return text
669
670 @staticmethod
671 def extract_relative_time(relative_time_text):
672 """
673 Extracts a relative time from string and converts to dt object
674 e.g. 'streamed 6 days ago', '5 seconds ago (edited)'
675 """
676 mobj = re.search(r'(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
677 if mobj:
678 try:
679 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')), precision='auto')
680 except ValueError:
681 return None
682
683 def _extract_time_text(self, renderer, *path_list):
684 text = self._get_text(renderer, *path_list) or ''
685 dt = self.extract_relative_time(text)
686 timestamp = None
687 if isinstance(dt, datetime.datetime):
688 timestamp = calendar.timegm(dt.timetuple())
689 if text and timestamp is None:
690 self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)
691 return timestamp, text
692
693 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
694 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
695 default_client='web'):
696 response = None
697 last_error = None
698 count = -1
699 retries = self.get_param('extractor_retries', 3)
700 if check_get_keys is None:
701 check_get_keys = []
702 while count < retries:
703 count += 1
704 if last_error:
705 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
706 try:
707 response = self._call_api(
708 ep=ep, fatal=True, headers=headers,
709 video_id=item_id, query=query,
710 context=self._extract_context(ytcfg, default_client),
711 api_key=self._extract_api_key(ytcfg, default_client),
712 api_hostname=api_hostname, default_client=default_client,
713 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
714 except ExtractorError as e:
715 if isinstance(e.cause, network_exceptions):
716 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
717 e.cause.seek(0)
718 yt_error = try_get(
719 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
720 lambda x: x['error']['message'], compat_str)
721 if yt_error:
722 self._report_alerts([('ERROR', yt_error)], fatal=False)
723 # Downloading page may result in intermittent 5xx HTTP error
724 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
725 # We also want to catch all other network exceptions since errors in later pages can be troublesome
726 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
727 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
728 last_error = error_to_compat_str(e.cause or e.msg)
729 if count < retries:
730 continue
731 if fatal:
732 raise
733 else:
734 self.report_warning(error_to_compat_str(e))
735 return
736
737 else:
738 try:
739 self._extract_and_report_alerts(response, only_once=True)
740 except ExtractorError as e:
741 # YouTube servers may return errors we want to retry on in a 200 OK response
742 # See: https://github.com/yt-dlp/yt-dlp/issues/839
743 if 'unknown error' in e.msg.lower():
744 last_error = e.msg
745 continue
746 if fatal:
747 raise
748 self.report_warning(error_to_compat_str(e))
749 return
750 if not check_get_keys or dict_get(response, check_get_keys):
751 break
752 # Youtube sometimes sends incomplete data
753 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
754 last_error = 'Incomplete data received'
755 if count >= retries:
756 if fatal:
757 raise ExtractorError(last_error)
758 else:
759 self.report_warning(last_error)
760 return
761 return response
762
763 @staticmethod
764 def is_music_url(url):
765 return re.match(r'https?://music\.youtube\.com/', url) is not None
766
767 def _extract_video(self, renderer):
768 video_id = renderer.get('videoId')
769 title = self._get_text(renderer, 'title')
770 description = self._get_text(renderer, 'descriptionSnippet')
771 duration = parse_duration(self._get_text(
772 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
773 view_count_text = self._get_text(renderer, 'viewCountText') or ''
774 view_count = str_to_int(self._search_regex(
775 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
776 'view count', default=None))
777
778 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
779 channel_id = traverse_obj(
780 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)
781 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
782 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
783 overlay_style = traverse_obj(
784 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
785 badges = self._extract_badges(renderer)
786 return {
787 '_type': 'url',
788 'ie_key': YoutubeIE.ie_key(),
789 'id': video_id,
790 'url': f'https://www.youtube.com/watch?v={video_id}',
791 'title': title,
792 'description': description,
793 'duration': duration,
794 'view_count': view_count,
795 'uploader': uploader,
796 'channel_id': channel_id,
797 'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
798 'live_status': ('is_upcoming' if scheduled_timestamp is not None
799 else 'was_live' if 'streamed' in time_text.lower()
800 else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
801 else None),
802 'release_timestamp': scheduled_timestamp,
803 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
804 }
805
806
807class YoutubeIE(YoutubeBaseInfoExtractor):
808 IE_DESC = 'YouTube'
809 _VALID_URL = r"""(?x)^
810 (
811 (?:https?://|//) # http(s):// or protocol-independent URL
812 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
813 (?:www\.)?deturl\.com/www\.youtube\.com|
814 (?:www\.)?pwnyoutube\.com|
815 (?:www\.)?hooktube\.com|
816 (?:www\.)?yourepeat\.com|
817 tube\.majestyc\.net|
818 %(invidious)s|
819 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
820 (?:.*?\#/)? # handle anchor (#/) redirect urls
821 (?: # the various things that can precede the ID:
822 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
823 |(?: # or the v= param in all its forms
824 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
825 (?:\?|\#!?) # the params delimiter ? or # or #!
826 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
827 v=
828 )
829 ))
830 |(?:
831 youtu\.be| # just youtu.be/xxxx
832 vid\.plus| # or vid.plus/xxxx
833 zwearz\.com/watch| # or zwearz.com/watch/xxxx
834 %(invidious)s
835 )/
836 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
837 )
838 )? # all until now is optional -> you can pass the naked ID
839 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
840 (?(1).+)? # if we found the ID, everything can follow
841 (?:\#|$)""" % {
842 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
843 }
844 _PLAYER_INFO_RE = (
845 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
846 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
847 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
848 )
849 _formats = {
850 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
851 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
852 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
853 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
854 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
855 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
856 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
857 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
858 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
859 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
860 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
861 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
862 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
863 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
864 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
865 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
866 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
867 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
868
869
870 # 3D videos
871 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
872 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
873 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
874 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
875 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
876 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
877 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
878
879 # Apple HTTP Live Streaming
880 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
881 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
882 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
883 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
884 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
885 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
886 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
887 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
888
889 # DASH mp4 video
890 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
891 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
892 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
893 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
894 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
895 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
896 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
897 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
898 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
899 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
900 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
901 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
902
903 # Dash mp4 audio
904 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
905 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
906 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
907 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
908 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
909 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
910 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
911
912 # Dash webm
913 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
914 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
915 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
916 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
917 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
918 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
919 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
920 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
921 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
922 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
923 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
924 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
925 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
926 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
927 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
928 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
929 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
930 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
931 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
932 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
933 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
934 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
935
936 # Dash webm audio
937 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
938 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
939
940 # Dash webm audio with opus inside
941 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
942 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
943 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
944
945 # RTMP (unnamed)
946 '_rtmp': {'protocol': 'rtmp'},
947
948 # av01 video only formats sometimes served with "unknown" codecs
949 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
950 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
951 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
952 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
953 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
954 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
955 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
956 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
957 }
958 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
959
960 _GEO_BYPASS = False
961
962 IE_NAME = 'youtube'
963 _TESTS = [
964 {
965 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
966 'info_dict': {
967 'id': 'BaW_jenozKc',
968 'ext': 'mp4',
969 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
970 'uploader': 'Philipp Hagemeister',
971 'uploader_id': 'phihag',
972 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
973 'channel': 'Philipp Hagemeister',
974 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
975 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
976 'upload_date': '20121002',
977 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
978 'categories': ['Science & Technology'],
979 'tags': ['youtube-dl'],
980 'duration': 10,
981 'view_count': int,
982 'like_count': int,
983 # 'dislike_count': int,
984 'availability': 'public',
985 'playable_in_embed': True,
986 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
987 'live_status': 'not_live',
988 'age_limit': 0,
989 'start_time': 1,
990 'end_time': 9,
991 }
992 },
993 {
994 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
995 'note': 'Embed-only video (#1746)',
996 'info_dict': {
997 'id': 'yZIXLfi8CZQ',
998 'ext': 'mp4',
999 'upload_date': '20120608',
1000 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1001 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1002 'uploader': 'SET India',
1003 'uploader_id': 'setindia',
1004 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1005 'age_limit': 18,
1006 },
1007 'skip': 'Private video',
1008 },
1009 {
1010 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1011 'note': 'Use the first video ID in the URL',
1012 'info_dict': {
1013 'id': 'BaW_jenozKc',
1014 'ext': 'mp4',
1015 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1016 'uploader': 'Philipp Hagemeister',
1017 'uploader_id': 'phihag',
1018 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1019 'upload_date': '20121002',
1020 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1021 'categories': ['Science & Technology'],
1022 'tags': ['youtube-dl'],
1023 'duration': 10,
1024 'view_count': int,
1025 'like_count': int,
1026 'dislike_count': int,
1027 },
1028 'params': {
1029 'skip_download': True,
1030 },
1031 },
1032 {
1033 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1034 'note': '256k DASH audio (format 141) via DASH manifest',
1035 'info_dict': {
1036 'id': 'a9LDPn-MO4I',
1037 'ext': 'm4a',
1038 'upload_date': '20121002',
1039 'uploader_id': '8KVIDEO',
1040 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1041 'description': '',
1042 'uploader': '8KVIDEO',
1043 'title': 'UHDTV TEST 8K VIDEO.mp4'
1044 },
1045 'params': {
1046 'youtube_include_dash_manifest': True,
1047 'format': '141',
1048 },
1049 'skip': 'format 141 not served anymore',
1050 },
1051 # DASH manifest with encrypted signature
1052 {
1053 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1054 'info_dict': {
1055 'id': 'IB3lcPjvWLA',
1056 'ext': 'm4a',
1057 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1058 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1059 'duration': 244,
1060 'uploader': 'AfrojackVEVO',
1061 'uploader_id': 'AfrojackVEVO',
1062 'upload_date': '20131011',
1063 'abr': 129.495,
1064 },
1065 'params': {
1066 'youtube_include_dash_manifest': True,
1067 'format': '141/bestaudio[ext=m4a]',
1068 },
1069 },
1070 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1071 {
1072 'note': 'Embed allowed age-gate video',
1073 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1074 'info_dict': {
1075 'id': 'HtVdAasjOgU',
1076 'ext': 'mp4',
1077 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1078 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1079 'duration': 142,
1080 'uploader': 'The Witcher',
1081 'uploader_id': 'WitcherGame',
1082 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1083 'upload_date': '20140605',
1084 'age_limit': 18,
1085 },
1086 },
1087 {
1088 'note': 'Age-gate video with embed allowed in public site',
1089 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1090 'info_dict': {
1091 'id': 'HsUATh_Nc2U',
1092 'ext': 'mp4',
1093 'title': 'Godzilla 2 (Official Video)',
1094 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1095 'upload_date': '20200408',
1096 'uploader_id': 'FlyingKitty900',
1097 'uploader': 'FlyingKitty',
1098 'age_limit': 18,
1099 },
1100 },
1101 {
1102 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1103 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1104 'info_dict': {
1105 'id': 'Tq92D6wQ1mg',
1106 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1107 'ext': 'mp4',
1108 'upload_date': '20191227',
1109 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1110 'uploader': 'Projekt Melody',
1111 'description': 'md5:17eccca93a786d51bc67646756894066',
1112 'age_limit': 18,
1113 },
1114 },
1115 {
1116 'note': 'Non-Agegated non-embeddable video',
1117 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1118 'info_dict': {
1119 'id': 'MeJVWBSsPAY',
1120 'ext': 'mp4',
1121 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1122 'uploader': 'Herr Lurik',
1123 'uploader_id': 'st3in234',
1124 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1125 'upload_date': '20130730',
1126 },
1127 },
1128 {
1129 'note': 'Non-bypassable age-gated video',
1130 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1131 'only_matching': True,
1132 },
1133 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1134 # YouTube Red ad is not captured for creator
1135 {
1136 'url': '__2ABJjxzNo',
1137 'info_dict': {
1138 'id': '__2ABJjxzNo',
1139 'ext': 'mp4',
1140 'duration': 266,
1141 'upload_date': '20100430',
1142 'uploader_id': 'deadmau5',
1143 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1144 'creator': 'deadmau5',
1145 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1146 'uploader': 'deadmau5',
1147 'title': 'Deadmau5 - Some Chords (HD)',
1148 'alt_title': 'Some Chords',
1149 },
1150 'expected_warnings': [
1151 'DASH manifest missing',
1152 ]
1153 },
1154 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1155 {
1156 'url': 'lqQg6PlCWgI',
1157 'info_dict': {
1158 'id': 'lqQg6PlCWgI',
1159 'ext': 'mp4',
1160 'duration': 6085,
1161 'upload_date': '20150827',
1162 'uploader_id': 'olympic',
1163 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1164 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1165 'uploader': 'Olympics',
1166 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1167 },
1168 'params': {
1169 'skip_download': 'requires avconv',
1170 }
1171 },
1172 # Non-square pixels
1173 {
1174 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1175 'info_dict': {
1176 'id': '_b-2C3KPAM0',
1177 'ext': 'mp4',
1178 'stretched_ratio': 16 / 9.,
1179 'duration': 85,
1180 'upload_date': '20110310',
1181 'uploader_id': 'AllenMeow',
1182 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1183 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1184 'uploader': '孫ᄋᄅ',
1185 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1186 },
1187 },
1188 # url_encoded_fmt_stream_map is empty string
1189 {
1190 'url': 'qEJwOuvDf7I',
1191 'info_dict': {
1192 'id': 'qEJwOuvDf7I',
1193 'ext': 'webm',
1194 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1195 'description': '',
1196 'upload_date': '20150404',
1197 'uploader_id': 'spbelect',
1198 'uploader': 'Наблюдатели Петербурга',
1199 },
1200 'params': {
1201 'skip_download': 'requires avconv',
1202 },
1203 'skip': 'This live event has ended.',
1204 },
1205 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1206 {
1207 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1208 'info_dict': {
1209 'id': 'FIl7x6_3R5Y',
1210 'ext': 'webm',
1211 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1212 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1213 'duration': 220,
1214 'upload_date': '20150625',
1215 'uploader_id': 'dorappi2000',
1216 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1217 'uploader': 'dorappi2000',
1218 'formats': 'mincount:31',
1219 },
1220 'skip': 'not actual anymore',
1221 },
1222 # DASH manifest with segment_list
1223 {
1224 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1225 'md5': '8ce563a1d667b599d21064e982ab9e31',
1226 'info_dict': {
1227 'id': 'CsmdDsKjzN8',
1228 'ext': 'mp4',
1229 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1230 'uploader': 'Airtek',
1231 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1232 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1233 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1234 },
1235 'params': {
1236 'youtube_include_dash_manifest': True,
1237 'format': '135', # bestvideo
1238 },
1239 'skip': 'This live event has ended.',
1240 },
1241 {
1242 # Multifeed videos (multiple cameras), URL is for Main Camera
1243 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1244 'info_dict': {
1245 'id': 'jvGDaLqkpTg',
1246 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1247 'description': 'md5:e03b909557865076822aa169218d6a5d',
1248 },
1249 'playlist': [{
1250 'info_dict': {
1251 'id': 'jvGDaLqkpTg',
1252 'ext': 'mp4',
1253 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1254 'description': 'md5:e03b909557865076822aa169218d6a5d',
1255 'duration': 10643,
1256 'upload_date': '20161111',
1257 'uploader': 'Team PGP',
1258 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1259 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1260 },
1261 }, {
1262 'info_dict': {
1263 'id': '3AKt1R1aDnw',
1264 'ext': 'mp4',
1265 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1266 'description': 'md5:e03b909557865076822aa169218d6a5d',
1267 'duration': 10991,
1268 'upload_date': '20161111',
1269 'uploader': 'Team PGP',
1270 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1271 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1272 },
1273 }, {
1274 'info_dict': {
1275 'id': 'RtAMM00gpVc',
1276 'ext': 'mp4',
1277 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1278 'description': 'md5:e03b909557865076822aa169218d6a5d',
1279 'duration': 10995,
1280 'upload_date': '20161111',
1281 'uploader': 'Team PGP',
1282 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1283 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1284 },
1285 }, {
1286 'info_dict': {
1287 'id': '6N2fdlP3C5U',
1288 'ext': 'mp4',
1289 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1290 'description': 'md5:e03b909557865076822aa169218d6a5d',
1291 'duration': 10990,
1292 'upload_date': '20161111',
1293 'uploader': 'Team PGP',
1294 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1295 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1296 },
1297 }],
1298 'params': {
1299 'skip_download': True,
1300 },
1301 'skip': 'Not multifeed anymore',
1302 },
1303 {
1304 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1305 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1306 'info_dict': {
1307 'id': 'gVfLd0zydlo',
1308 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1309 },
1310 'playlist_count': 2,
1311 'skip': 'Not multifeed anymore',
1312 },
1313 {
1314 'url': 'https://vid.plus/FlRa-iH7PGw',
1315 'only_matching': True,
1316 },
1317 {
1318 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1319 'only_matching': True,
1320 },
1321 {
1322 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1323 # Also tests cut-off URL expansion in video description (see
1324 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1325 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1326 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1327 'info_dict': {
1328 'id': 'lsguqyKfVQg',
1329 'ext': 'mp4',
1330 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1331 'alt_title': 'Dark Walk',
1332 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1333 'duration': 133,
1334 'upload_date': '20151119',
1335 'uploader_id': 'IronSoulElf',
1336 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1337 'uploader': 'IronSoulElf',
1338 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1339 'track': 'Dark Walk',
1340 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1341 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1342 },
1343 'params': {
1344 'skip_download': True,
1345 },
1346 },
1347 {
1348 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1349 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1350 'only_matching': True,
1351 },
1352 {
1353 # Video with yt:stretch=17:0
1354 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1355 'info_dict': {
1356 'id': 'Q39EVAstoRM',
1357 'ext': 'mp4',
1358 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1359 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1360 'upload_date': '20151107',
1361 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1362 'uploader': 'CH GAMER DROID',
1363 },
1364 'params': {
1365 'skip_download': True,
1366 },
1367 'skip': 'This video does not exist.',
1368 },
1369 {
1370 # Video with incomplete 'yt:stretch=16:'
1371 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1372 'only_matching': True,
1373 },
1374 {
1375 # Video licensed under Creative Commons
1376 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1377 'info_dict': {
1378 'id': 'M4gD1WSo5mA',
1379 'ext': 'mp4',
1380 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1381 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1382 'duration': 721,
1383 'upload_date': '20150127',
1384 'uploader_id': 'BerkmanCenter',
1385 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1386 'uploader': 'The Berkman Klein Center for Internet & Society',
1387 'license': 'Creative Commons Attribution license (reuse allowed)',
1388 },
1389 'params': {
1390 'skip_download': True,
1391 },
1392 },
1393 {
1394 # Channel-like uploader_url
1395 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1396 'info_dict': {
1397 'id': 'eQcmzGIKrzg',
1398 'ext': 'mp4',
1399 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1400 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1401 'duration': 4060,
1402 'upload_date': '20151119',
1403 'uploader': 'Bernie Sanders',
1404 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1405 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1406 'license': 'Creative Commons Attribution license (reuse allowed)',
1407 },
1408 'params': {
1409 'skip_download': True,
1410 },
1411 },
1412 {
1413 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1414 'only_matching': True,
1415 },
1416 {
1417 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1418 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1419 'only_matching': True,
1420 },
1421 {
1422 # Rental video preview
1423 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1424 'info_dict': {
1425 'id': 'uGpuVWrhIzE',
1426 'ext': 'mp4',
1427 'title': 'Piku - Trailer',
1428 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1429 'upload_date': '20150811',
1430 'uploader': 'FlixMatrix',
1431 'uploader_id': 'FlixMatrixKaravan',
1432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1433 'license': 'Standard YouTube License',
1434 },
1435 'params': {
1436 'skip_download': True,
1437 },
1438 'skip': 'This video is not available.',
1439 },
1440 {
1441 # YouTube Red video with episode data
1442 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1443 'info_dict': {
1444 'id': 'iqKdEhx-dD4',
1445 'ext': 'mp4',
1446 'title': 'Isolation - Mind Field (Ep 1)',
1447 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1448 'duration': 2085,
1449 'upload_date': '20170118',
1450 'uploader': 'Vsauce',
1451 'uploader_id': 'Vsauce',
1452 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1453 'series': 'Mind Field',
1454 'season_number': 1,
1455 'episode_number': 1,
1456 },
1457 'params': {
1458 'skip_download': True,
1459 },
1460 'expected_warnings': [
1461 'Skipping DASH manifest',
1462 ],
1463 },
1464 {
1465 # The following content has been identified by the YouTube community
1466 # as inappropriate or offensive to some audiences.
1467 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1468 'info_dict': {
1469 'id': '6SJNVb0GnPI',
1470 'ext': 'mp4',
1471 'title': 'Race Differences in Intelligence',
1472 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1473 'duration': 965,
1474 'upload_date': '20140124',
1475 'uploader': 'New Century Foundation',
1476 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1477 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1478 },
1479 'params': {
1480 'skip_download': True,
1481 },
1482 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1483 },
1484 {
1485 # itag 212
1486 'url': '1t24XAntNCY',
1487 'only_matching': True,
1488 },
1489 {
1490 # geo restricted to JP
1491 'url': 'sJL6WA-aGkQ',
1492 'only_matching': True,
1493 },
1494 {
1495 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1496 'only_matching': True,
1497 },
1498 {
1499 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1500 'only_matching': True,
1501 },
1502 {
1503 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1504 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1505 'only_matching': True,
1506 },
1507 {
1508 # DRM protected
1509 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1510 'only_matching': True,
1511 },
1512 {
1513 # Video with unsupported adaptive stream type formats
1514 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1515 'info_dict': {
1516 'id': 'Z4Vy8R84T1U',
1517 'ext': 'mp4',
1518 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1519 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1520 'duration': 433,
1521 'upload_date': '20130923',
1522 'uploader': 'Amelia Putri Harwita',
1523 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1524 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1525 'formats': 'maxcount:10',
1526 },
1527 'params': {
1528 'skip_download': True,
1529 'youtube_include_dash_manifest': False,
1530 },
1531 'skip': 'not actual anymore',
1532 },
1533 {
1534 # Youtube Music Auto-generated description
1535 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1536 'info_dict': {
1537 'id': 'MgNrAu2pzNs',
1538 'ext': 'mp4',
1539 'title': 'Voyeur Girl',
1540 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1541 'upload_date': '20190312',
1542 'uploader': 'Stephen - Topic',
1543 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1544 'artist': 'Stephen',
1545 'track': 'Voyeur Girl',
1546 'album': 'it\'s too much love to know my dear',
1547 'release_date': '20190313',
1548 'release_year': 2019,
1549 },
1550 'params': {
1551 'skip_download': True,
1552 },
1553 },
1554 {
1555 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1556 'only_matching': True,
1557 },
1558 {
1559 # invalid -> valid video id redirection
1560 'url': 'DJztXj2GPfl',
1561 'info_dict': {
1562 'id': 'DJztXj2GPfk',
1563 'ext': 'mp4',
1564 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1565 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1566 'upload_date': '20090125',
1567 'uploader': 'Prochorowka',
1568 'uploader_id': 'Prochorowka',
1569 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1570 'artist': 'Panjabi MC',
1571 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1572 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1573 },
1574 'params': {
1575 'skip_download': True,
1576 },
1577 'skip': 'Video unavailable',
1578 },
1579 {
1580 # empty description results in an empty string
1581 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1582 'info_dict': {
1583 'id': 'x41yOUIvK2k',
1584 'ext': 'mp4',
1585 'title': 'IMG 3456',
1586 'description': '',
1587 'upload_date': '20170613',
1588 'uploader_id': 'ElevageOrVert',
1589 'uploader': 'ElevageOrVert',
1590 },
1591 'params': {
1592 'skip_download': True,
1593 },
1594 },
1595 {
1596 # with '};' inside yt initial data (see [1])
1597 # see [2] for an example with '};' inside ytInitialPlayerResponse
1598 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1599 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1600 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1601 'info_dict': {
1602 'id': 'CHqg6qOn4no',
1603 'ext': 'mp4',
1604 'title': 'Part 77 Sort a list of simple types in c#',
1605 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1606 'upload_date': '20130831',
1607 'uploader_id': 'kudvenkat',
1608 'uploader': 'kudvenkat',
1609 },
1610 'params': {
1611 'skip_download': True,
1612 },
1613 },
1614 {
1615 # another example of '};' in ytInitialData
1616 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1617 'only_matching': True,
1618 },
1619 {
1620 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1621 'only_matching': True,
1622 },
1623 {
1624 # https://github.com/ytdl-org/youtube-dl/pull/28094
1625 'url': 'OtqTfy26tG0',
1626 'info_dict': {
1627 'id': 'OtqTfy26tG0',
1628 'ext': 'mp4',
1629 'title': 'Burn Out',
1630 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1631 'upload_date': '20141120',
1632 'uploader': 'The Cinematic Orchestra - Topic',
1633 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1634 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1635 'artist': 'The Cinematic Orchestra',
1636 'track': 'Burn Out',
1637 'album': 'Every Day',
1638 'release_data': None,
1639 'release_year': None,
1640 },
1641 'params': {
1642 'skip_download': True,
1643 },
1644 },
1645 {
1646 # controversial video, only works with bpctr when authenticated with cookies
1647 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1648 'only_matching': True,
1649 },
1650 {
1651 # controversial video, requires bpctr/contentCheckOk
1652 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1653 'info_dict': {
1654 'id': 'SZJvDhaSDnc',
1655 'ext': 'mp4',
1656 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1657 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1658 'uploader': 'CBS This Morning',
1659 'uploader_id': 'CBSThisMorning',
1660 'upload_date': '20140716',
1661 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1662 }
1663 },
1664 {
1665 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1666 'url': 'cBvYw8_A0vQ',
1667 'info_dict': {
1668 'id': 'cBvYw8_A0vQ',
1669 'ext': 'mp4',
1670 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1671 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1672 'upload_date': '20201120',
1673 'uploader': 'Walk around Japan',
1674 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1675 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1676 },
1677 'params': {
1678 'skip_download': True,
1679 },
1680 }, {
1681 # Has multiple audio streams
1682 'url': 'WaOKSUlf4TM',
1683 'only_matching': True
1684 }, {
1685 # Requires Premium: has format 141 when requested using YTM url
1686 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1687 'only_matching': True
1688 }, {
1689 # multiple subtitles with same lang_code
1690 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1691 'only_matching': True,
1692 }, {
1693 # Force use android client fallback
1694 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1695 'info_dict': {
1696 'id': 'YOelRv7fMxY',
1697 'title': 'DIGGING A SECRET TUNNEL Part 1',
1698 'ext': '3gp',
1699 'upload_date': '20210624',
1700 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1701 'uploader': 'colinfurze',
1702 'uploader_id': 'colinfurze',
1703 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1704 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1705 },
1706 'params': {
1707 'format': '17', # 3gp format available on android
1708 'extractor_args': {'youtube': {'player_client': ['android']}},
1709 },
1710 },
1711 {
1712 # Skip download of additional client configs (remix client config in this case)
1713 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1714 'only_matching': True,
1715 'params': {
1716 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1717 },
1718 }, {
1719 # shorts
1720 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1721 'only_matching': True,
1722 }, {
1723 'note': 'Storyboards',
1724 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1725 'info_dict': {
1726 'id': '5KLPxDtMqe8',
1727 'ext': 'mhtml',
1728 'format_id': 'sb0',
1729 'title': 'Your Brain is Plastic',
1730 'uploader_id': 'scishow',
1731 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1732 'upload_date': '20140324',
1733 'uploader': 'SciShow',
1734 }, 'params': {'format': 'mhtml', 'skip_download': True}
1735 }
1736 ]
1737
1738 @classmethod
1739 def suitable(cls, url):
1740 from ..utils import parse_qs
1741
1742 qs = parse_qs(url)
1743 if qs.get('list', [None])[0]:
1744 return False
1745 return super(YoutubeIE, cls).suitable(url)
1746
1747 def __init__(self, *args, **kwargs):
1748 super(YoutubeIE, self).__init__(*args, **kwargs)
1749 self._code_cache = {}
1750 self._player_cache = {}
1751
1752 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
1753 EXPIRATION_DURATION = 18_000
1754 lock = threading.Lock()
1755
1756 is_live = True
1757 expiration_time = time.time() + EXPIRATION_DURATION
1758 formats = [f for f in formats if f.get('is_from_start')]
1759
1760 def refetch_manifest(format_id):
1761 nonlocal formats, expiration_time, is_live
1762 if time.time() <= expiration_time:
1763 return
1764
1765 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
1766 video_details = traverse_obj(
1767 prs, (..., 'videoDetails'), expected_type=dict, default=[])
1768 microformats = traverse_obj(
1769 prs, (..., 'microformat', 'playerMicroformatRenderer'),
1770 expected_type=dict, default=[])
1771 _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
1772 expiration_time = time.time() + EXPIRATION_DURATION
1773
1774 def mpd_feed(format_id):
1775 """
1776 @returns (manifest_url, manifest_stream_number, is_live) or None
1777 """
1778 with lock:
1779 refetch_manifest(format_id)
1780
1781 f = next((f for f in formats if f['format_id'] == format_id), None)
1782 if not f:
1783 self.report_warning(
1784 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
1785 return None
1786 return f['manifest_url'], f['manifest_stream_number'], is_live
1787
1788 for f in formats:
1789 f['protocol'] = 'http_dash_segments_generator'
1790 f['fragments'] = functools.partial(
1791 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
1792
1793 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
1794 FETCH_SPAN, MAX_DURATION = 5, 432000
1795
1796 mpd_url, stream_number, is_live = None, None, True
1797
1798 begin_index = 0
1799 download_start_time = ctx.get('start') or time.time()
1800
1801 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
1802 if lack_early_segments:
1803 self.report_warning(bug_reports_message(
1804 'Starting download from the last 120 hours of the live stream since '
1805 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
1806 lack_early_segments = True
1807
1808 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
1809 fragments, fragment_base_url = None, None
1810
1811 def _extract_sequence_from_mpd(refresh_sequence):
1812 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
1813 # Obtain from MPD's maximum seq value
1814 old_mpd_url = mpd_url
1815 mpd_url, stream_number, is_live = mpd_feed(format_id) or (mpd_url, stream_number, False)
1816 if old_mpd_url == mpd_url and not refresh_sequence:
1817 return True, last_seq
1818 try:
1819 fmts, _ = self._extract_mpd_formats_and_subtitles(
1820 mpd_url, None, note=False, errnote=False, fatal=False)
1821 except ExtractorError:
1822 fmts = None
1823 if not fmts:
1824 no_fragment_score += 1
1825 return False, last_seq
1826 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
1827 fragments = fmt_info['fragments']
1828 fragment_base_url = fmt_info['fragment_base_url']
1829 assert fragment_base_url
1830
1831 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
1832 return True, _last_seq
1833
1834 while is_live:
1835 fetch_time = time.time()
1836 if no_fragment_score > 30:
1837 return
1838 if last_segment_url:
1839 # Obtain from "X-Head-Seqnum" header value from each segment
1840 try:
1841 urlh = self._request_webpage(
1842 last_segment_url, None, note=False, errnote=False, fatal=False)
1843 except ExtractorError:
1844 urlh = None
1845 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
1846 if last_seq is None:
1847 no_fragment_score += 1
1848 last_segment_url = None
1849 continue
1850 else:
1851 should_retry, last_seq = _extract_sequence_from_mpd(True)
1852 if not should_retry:
1853 continue
1854
1855 if known_idx > last_seq:
1856 last_segment_url = None
1857 continue
1858
1859 last_seq += 1
1860
1861 if begin_index < 0 and known_idx < 0:
1862 # skip from the start when it's negative value
1863 known_idx = last_seq + begin_index
1864 if lack_early_segments:
1865 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
1866 try:
1867 for idx in range(known_idx, last_seq):
1868 # do not update sequence here or you'll get skipped some part of it
1869 should_retry, _ = _extract_sequence_from_mpd(False)
1870 if not should_retry:
1871 # retry when it gets weird state
1872 known_idx = idx - 1
1873 raise ExtractorError('breaking out of outer loop')
1874 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
1875 yield {
1876 'url': last_segment_url,
1877 }
1878 if known_idx == last_seq:
1879 no_fragment_score += 5
1880 else:
1881 no_fragment_score = 0
1882 known_idx = last_seq
1883 except ExtractorError:
1884 continue
1885
1886 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
1887
1888 def _extract_player_url(self, *ytcfgs, webpage=None):
1889 player_url = traverse_obj(
1890 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1891 get_all=False, expected_type=compat_str)
1892 if not player_url:
1893 return
1894 if player_url.startswith('//'):
1895 player_url = 'https:' + player_url
1896 elif not re.match(r'https?://', player_url):
1897 player_url = compat_urlparse.urljoin(
1898 'https://www.youtube.com', player_url)
1899 return player_url
1900
1901 def _download_player_url(self, video_id, fatal=False):
1902 res = self._download_webpage(
1903 'https://www.youtube.com/iframe_api',
1904 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1905 if res:
1906 player_version = self._search_regex(
1907 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1908 if player_version:
1909 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1910
1911 def _signature_cache_id(self, example_sig):
1912 """ Return a string representation of a signature """
1913 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1914
1915 @classmethod
1916 def _extract_player_info(cls, player_url):
1917 for player_re in cls._PLAYER_INFO_RE:
1918 id_m = re.search(player_re, player_url)
1919 if id_m:
1920 break
1921 else:
1922 raise ExtractorError('Cannot identify player %r' % player_url)
1923 return id_m.group('id')
1924
1925 def _load_player(self, video_id, player_url, fatal=True):
1926 player_id = self._extract_player_info(player_url)
1927 if player_id not in self._code_cache:
1928 code = self._download_webpage(
1929 player_url, video_id, fatal=fatal,
1930 note='Downloading player ' + player_id,
1931 errnote='Download of %s failed' % player_url)
1932 if code:
1933 self._code_cache[player_id] = code
1934 return self._code_cache.get(player_id)
1935
1936 def _extract_signature_function(self, video_id, player_url, example_sig):
1937 player_id = self._extract_player_info(player_url)
1938
1939 # Read from filesystem cache
1940 func_id = 'js_%s_%s' % (
1941 player_id, self._signature_cache_id(example_sig))
1942 assert os.path.basename(func_id) == func_id
1943
1944 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1945 if cache_spec is not None:
1946 return lambda s: ''.join(s[i] for i in cache_spec)
1947
1948 code = self._load_player(video_id, player_url)
1949 if code:
1950 res = self._parse_sig_js(code)
1951
1952 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1953 cache_res = res(test_string)
1954 cache_spec = [ord(c) for c in cache_res]
1955
1956 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1957 return res
1958
1959 def _print_sig_code(self, func, example_sig):
1960 if not self.get_param('youtube_print_sig_code'):
1961 return
1962
1963 def gen_sig_code(idxs):
1964 def _genslice(start, end, step):
1965 starts = '' if start == 0 else str(start)
1966 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1967 steps = '' if step == 1 else (':%d' % step)
1968 return 's[%s%s%s]' % (starts, ends, steps)
1969
1970 step = None
1971 # Quelch pyflakes warnings - start will be set when step is set
1972 start = '(Never used)'
1973 for i, prev in zip(idxs[1:], idxs[:-1]):
1974 if step is not None:
1975 if i - prev == step:
1976 continue
1977 yield _genslice(start, prev, step)
1978 step = None
1979 continue
1980 if i - prev in [-1, 1]:
1981 step = i - prev
1982 start = prev
1983 continue
1984 else:
1985 yield 's[%d]' % prev
1986 if step is None:
1987 yield 's[%d]' % i
1988 else:
1989 yield _genslice(start, i, step)
1990
1991 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1992 cache_res = func(test_string)
1993 cache_spec = [ord(c) for c in cache_res]
1994 expr_code = ' + '.join(gen_sig_code(cache_spec))
1995 signature_id_tuple = '(%s)' % (
1996 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1997 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1998 ' return %s\n') % (signature_id_tuple, expr_code)
1999 self.to_screen('Extracted signature function:\n' + code)
2000
2001 def _parse_sig_js(self, jscode):
2002 funcname = self._search_regex(
2003 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2004 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2005 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2006 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2007 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2008 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2009 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2010 # Obsolete patterns
2011 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2012 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2013 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2014 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2015 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2016 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2017 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2018 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2019 jscode, 'Initial JS player signature function name', group='sig')
2020
2021 jsi = JSInterpreter(jscode)
2022 initial_function = jsi.extract_function(funcname)
2023 return lambda s: initial_function([s])
2024
2025 def _decrypt_signature(self, s, video_id, player_url):
2026 """Turn the encrypted s field into a working signature"""
2027
2028 if player_url is None:
2029 raise ExtractorError('Cannot decrypt signature without player_url')
2030
2031 try:
2032 player_id = (player_url, self._signature_cache_id(s))
2033 if player_id not in self._player_cache:
2034 func = self._extract_signature_function(
2035 video_id, player_url, s
2036 )
2037 self._player_cache[player_id] = func
2038 func = self._player_cache[player_id]
2039 self._print_sig_code(func, s)
2040 return func(s)
2041 except Exception as e:
2042 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
2043
2044 def _decrypt_nsig(self, s, video_id, player_url):
2045 """Turn the encrypted n field into a working signature"""
2046 if player_url is None:
2047 raise ExtractorError('Cannot decrypt nsig without player_url')
2048 if player_url.startswith('//'):
2049 player_url = 'https:' + player_url
2050 elif not re.match(r'https?://', player_url):
2051 player_url = compat_urlparse.urljoin(
2052 'https://www.youtube.com', player_url)
2053
2054 sig_id = ('nsig_value', s)
2055 if sig_id in self._player_cache:
2056 return self._player_cache[sig_id]
2057
2058 try:
2059 player_id = ('nsig', player_url)
2060 if player_id not in self._player_cache:
2061 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
2062 func = self._player_cache[player_id]
2063 self._player_cache[sig_id] = func(s)
2064 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
2065 return self._player_cache[sig_id]
2066 except Exception as e:
2067 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
2068
2069 def _extract_n_function_name(self, jscode):
2070 return self._search_regex(
2071 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
2072 jscode, 'Initial JS player n function name', group='nfunc')
2073
2074 def _extract_n_function(self, video_id, player_url):
2075 player_id = self._extract_player_info(player_url)
2076 func_code = self._downloader.cache.load('youtube-nsig', player_id)
2077
2078 if func_code:
2079 jsi = JSInterpreter(func_code)
2080 else:
2081 jscode = self._load_player(video_id, player_url)
2082 funcname = self._extract_n_function_name(jscode)
2083 jsi = JSInterpreter(jscode)
2084 func_code = jsi.extract_function_code(funcname)
2085 self._downloader.cache.store('youtube-nsig', player_id, func_code)
2086
2087 if self.get_param('youtube_print_sig_code'):
2088 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2089
2090 return lambda s: jsi.extract_function_from_code(*func_code)([s])
2091
2092 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2093 """
2094 Extract signatureTimestamp (sts)
2095 Required to tell API what sig/player version is in use.
2096 """
2097 sts = None
2098 if isinstance(ytcfg, dict):
2099 sts = int_or_none(ytcfg.get('STS'))
2100
2101 if not sts:
2102 # Attempt to extract from player
2103 if player_url is None:
2104 error_msg = 'Cannot extract signature timestamp without player_url.'
2105 if fatal:
2106 raise ExtractorError(error_msg)
2107 self.report_warning(error_msg)
2108 return
2109 code = self._load_player(video_id, player_url, fatal=fatal)
2110 if code:
2111 sts = int_or_none(self._search_regex(
2112 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2113 'JS player signature timestamp', group='sts', fatal=fatal))
2114 return sts
2115
2116 def _mark_watched(self, video_id, player_responses):
2117 playback_url = get_first(
2118 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2119 expected_type=url_or_none)
2120 if not playback_url:
2121 self.report_warning('Unable to mark watched')
2122 return
2123 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2124 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2125
2126 # cpn generation algorithm is reverse engineered from base.js.
2127 # In fact it works even with dummy cpn.
2128 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2129 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2130
2131 qs.update({
2132 'ver': ['2'],
2133 'cpn': [cpn],
2134 })
2135 playback_url = compat_urlparse.urlunparse(
2136 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2137
2138 self._download_webpage(
2139 playback_url, video_id, 'Marking watched',
2140 'Unable to mark watched', fatal=False)
2141
2142 @staticmethod
2143 def _extract_urls(webpage):
2144 # Embedded YouTube player
2145 entries = [
2146 unescapeHTML(mobj.group('url'))
2147 for mobj in re.finditer(r'''(?x)
2148 (?:
2149 <iframe[^>]+?src=|
2150 data-video-url=|
2151 <embed[^>]+?src=|
2152 embedSWF\(?:\s*|
2153 <object[^>]+data=|
2154 new\s+SWFObject\(
2155 )
2156 (["\'])
2157 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2158 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2159 \1''', webpage)]
2160
2161 # lazyYT YouTube embed
2162 entries.extend(list(map(
2163 unescapeHTML,
2164 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2165
2166 # Wordpress "YouTube Video Importer" plugin
2167 matches = re.findall(r'''(?x)<div[^>]+
2168 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2169 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2170 entries.extend(m[-1] for m in matches)
2171
2172 return entries
2173
2174 @staticmethod
2175 def _extract_url(webpage):
2176 urls = YoutubeIE._extract_urls(webpage)
2177 return urls[0] if urls else None
2178
2179 @classmethod
2180 def extract_id(cls, url):
2181 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2182 if mobj is None:
2183 raise ExtractorError('Invalid URL: %s' % url)
2184 return mobj.group('id')
2185
2186 def _extract_chapters_from_json(self, data, duration):
2187 chapter_list = traverse_obj(
2188 data, (
2189 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2190 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2191 ), expected_type=list)
2192
2193 return self._extract_chapters(
2194 chapter_list,
2195 chapter_time=lambda chapter: float_or_none(
2196 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2197 chapter_title=lambda chapter: traverse_obj(
2198 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2199 duration=duration)
2200
2201 def _extract_chapters_from_engagement_panel(self, data, duration):
2202 content_list = traverse_obj(
2203 data,
2204 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2205 expected_type=list, default=[])
2206 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2207 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2208
2209 return next((
2210 filter(None, (
2211 self._extract_chapters(
2212 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2213 chapter_time, chapter_title, duration)
2214 for contents in content_list
2215 ))), [])
2216
2217 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2218 chapters = []
2219 last_chapter = {'start_time': 0}
2220 for idx, chapter in enumerate(chapter_list or []):
2221 title = chapter_title(chapter)
2222 start_time = chapter_time(chapter)
2223 if start_time is None:
2224 continue
2225 last_chapter['end_time'] = start_time
2226 if start_time < last_chapter['start_time']:
2227 if idx == 1:
2228 chapters.pop()
2229 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2230 else:
2231 self.report_warning(f'Invalid start time for chapter "{title}"')
2232 continue
2233 last_chapter = {'start_time': start_time, 'title': title}
2234 chapters.append(last_chapter)
2235 last_chapter['end_time'] = duration
2236 return chapters
2237
2238 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2239 return self._parse_json(self._search_regex(
2240 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2241 regex), webpage, name, default='{}'), video_id, fatal=False)
2242
2243 def _extract_comment(self, comment_renderer, parent=None):
2244 comment_id = comment_renderer.get('commentId')
2245 if not comment_id:
2246 return
2247
2248 text = self._get_text(comment_renderer, 'contentText')
2249
2250 # note: timestamp is an estimate calculated from the current time and time_text
2251 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2252 author = self._get_text(comment_renderer, 'authorText')
2253 author_id = try_get(comment_renderer,
2254 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2255
2256 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2257 lambda x: x['likeCount']), compat_str)) or 0
2258 author_thumbnail = try_get(comment_renderer,
2259 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2260
2261 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2262 is_favorited = 'creatorHeart' in (try_get(
2263 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2264 return {
2265 'id': comment_id,
2266 'text': text,
2267 'timestamp': timestamp,
2268 'time_text': time_text,
2269 'like_count': votes,
2270 'is_favorited': is_favorited,
2271 'author': author,
2272 'author_id': author_id,
2273 'author_thumbnail': author_thumbnail,
2274 'author_is_uploader': author_is_uploader,
2275 'parent': parent or 'root'
2276 }
2277
2278 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2279
2280 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2281
2282 def extract_header(contents):
2283 _continuation = None
2284 for content in contents:
2285 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2286 expected_comment_count = parse_count(self._get_text(
2287 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2288
2289 if expected_comment_count:
2290 tracker['est_total'] = expected_comment_count
2291 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2292 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2293
2294 sort_menu_item = try_get(
2295 comments_header_renderer,
2296 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2297 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2298
2299 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2300 if not _continuation:
2301 continue
2302
2303 sort_text = str_or_none(sort_menu_item.get('title'))
2304 if not sort_text:
2305 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2306 self.to_screen('Sorting comments by %s' % sort_text.lower())
2307 break
2308 return _continuation
2309
2310 def extract_thread(contents):
2311 if not parent:
2312 tracker['current_page_thread'] = 0
2313 for content in contents:
2314 if not parent and tracker['total_parent_comments'] >= max_parents:
2315 yield
2316 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2317 comment_renderer = get_first(
2318 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2319 expected_type=dict, default={})
2320
2321 comment = self._extract_comment(comment_renderer, parent)
2322 if not comment:
2323 continue
2324
2325 tracker['running_total'] += 1
2326 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2327 yield comment
2328
2329 # Attempt to get the replies
2330 comment_replies_renderer = try_get(
2331 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2332
2333 if comment_replies_renderer:
2334 tracker['current_page_thread'] += 1
2335 comment_entries_iter = self._comment_entries(
2336 comment_replies_renderer, ytcfg, video_id,
2337 parent=comment.get('id'), tracker=tracker)
2338 for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
2339 yield reply_comment
2340
2341 # Keeps track of counts across recursive calls
2342 if not tracker:
2343 tracker = dict(
2344 running_total=0,
2345 est_total=0,
2346 current_page_thread=0,
2347 total_parent_comments=0,
2348 total_reply_comments=0)
2349
2350 # TODO: Deprecated
2351 # YouTube comments have a max depth of 2
2352 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2353 if max_depth:
2354 self._downloader.deprecation_warning(
2355 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2356 if max_depth == 1 and parent:
2357 return
2358
2359 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2360 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2361
2362 continuation = self._extract_continuation(root_continuation_data)
2363 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2364 if message and not parent:
2365 self.report_warning(message, video_id=video_id)
2366
2367 response = None
2368 is_first_continuation = parent is None
2369
2370 for page_num in itertools.count(0):
2371 if not continuation:
2372 break
2373 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2374 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2375 if page_num == 0:
2376 if is_first_continuation:
2377 note_prefix = 'Downloading comment section API JSON'
2378 else:
2379 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2380 tracker['current_page_thread'], comment_prog_str)
2381 else:
2382 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2383 ' ' if parent else '', ' replies' if parent else '',
2384 page_num, comment_prog_str)
2385
2386 response = self._extract_response(
2387 item_id=None, query=continuation,
2388 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2389 check_get_keys='onResponseReceivedEndpoints')
2390
2391 continuation_contents = traverse_obj(
2392 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
2393
2394 continuation = None
2395 for continuation_section in continuation_contents:
2396 continuation_items = traverse_obj(
2397 continuation_section,
2398 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2399 get_all=False, expected_type=list) or []
2400 if is_first_continuation:
2401 continuation = extract_header(continuation_items)
2402 is_first_continuation = False
2403 if continuation:
2404 break
2405 continue
2406
2407 for entry in extract_thread(continuation_items):
2408 if not entry:
2409 return
2410 yield entry
2411 continuation = self._extract_continuation({'contents': continuation_items})
2412 if continuation:
2413 break
2414
2415 def _get_comments(self, ytcfg, video_id, contents, webpage):
2416 """Entry for comment extraction"""
2417 def _real_comment_extract(contents):
2418 renderer = next((
2419 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2420 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2421 yield from self._comment_entries(renderer, ytcfg, video_id)
2422
2423 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2424 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2425
2426 @staticmethod
2427 def _get_checkok_params():
2428 return {'contentCheckOk': True, 'racyCheckOk': True}
2429
2430 @classmethod
2431 def _generate_player_context(cls, sts=None):
2432 context = {
2433 'html5Preference': 'HTML5_PREF_WANTS',
2434 }
2435 if sts is not None:
2436 context['signatureTimestamp'] = sts
2437 return {
2438 'playbackContext': {
2439 'contentPlaybackContext': context
2440 },
2441 **cls._get_checkok_params()
2442 }
2443
2444 @staticmethod
2445 def _is_agegated(player_response):
2446 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2447 return True
2448
2449 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2450 AGE_GATE_REASONS = (
2451 'confirm your age', 'age-restricted', 'inappropriate', # reason
2452 'age_verification_required', 'age_check_required', # status
2453 )
2454 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2455
2456 @staticmethod
2457 def _is_unplayable(player_response):
2458 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2459
2460 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2461
2462 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2463 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2464 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2465 headers = self.generate_api_headers(
2466 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2467
2468 yt_query = {'videoId': video_id}
2469 yt_query.update(self._generate_player_context(sts))
2470 return self._extract_response(
2471 item_id=video_id, ep='player', query=yt_query,
2472 ytcfg=player_ytcfg, headers=headers, fatal=True,
2473 default_client=client,
2474 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2475 ) or None
2476
2477 def _get_requested_clients(self, url, smuggled_data):
2478 requested_clients = []
2479 default = ['android', 'web']
2480 allowed_clients = sorted(
2481 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2482 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2483 for client in self._configuration_arg('player_client'):
2484 if client in allowed_clients:
2485 requested_clients.append(client)
2486 elif client == 'default':
2487 requested_clients.extend(default)
2488 elif client == 'all':
2489 requested_clients.extend(allowed_clients)
2490 else:
2491 self.report_warning(f'Skipping unsupported client {client}')
2492 if not requested_clients:
2493 requested_clients = default
2494
2495 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2496 requested_clients.extend(
2497 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2498
2499 return orderedSet(requested_clients)
2500
2501 def _extract_player_ytcfg(self, client, video_id):
2502 url = {
2503 'web_music': 'https://music.youtube.com',
2504 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2505 }.get(client)
2506 if not url:
2507 return {}
2508 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2509 return self.extract_ytcfg(video_id, webpage) or {}
2510
2511 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2512 initial_pr = None
2513 if webpage:
2514 initial_pr = self._extract_yt_initial_variable(
2515 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2516 video_id, 'initial player response')
2517
2518 original_clients = clients
2519 clients = clients[::-1]
2520 prs = []
2521
2522 def append_client(client_name):
2523 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2524 clients.append(client_name)
2525
2526 # Android player_response does not have microFormats which are needed for
2527 # extraction of some data. So we return the initial_pr with formats
2528 # stripped out even if not requested by the user
2529 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2530 if initial_pr:
2531 pr = dict(initial_pr)
2532 pr['streamingData'] = None
2533 prs.append(pr)
2534
2535 last_error = None
2536 tried_iframe_fallback = False
2537 player_url = None
2538 while clients:
2539 client = clients.pop()
2540 player_ytcfg = master_ytcfg if client == 'web' else {}
2541 if 'configs' not in self._configuration_arg('player_skip'):
2542 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2543
2544 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2545 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2546 if 'js' in self._configuration_arg('player_skip'):
2547 require_js_player = False
2548 player_url = None
2549
2550 if not player_url and not tried_iframe_fallback and require_js_player:
2551 player_url = self._download_player_url(video_id)
2552 tried_iframe_fallback = True
2553
2554 try:
2555 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2556 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2557 except ExtractorError as e:
2558 if last_error:
2559 self.report_warning(last_error)
2560 last_error = e
2561 continue
2562
2563 if pr:
2564 prs.append(pr)
2565
2566 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2567 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2568 append_client(client.replace('_agegate', '_creator'))
2569 elif self._is_agegated(pr):
2570 append_client(f'{client}_agegate')
2571
2572 if last_error:
2573 if not len(prs):
2574 raise last_error
2575 self.report_warning(last_error)
2576 return prs, player_url
2577
2578 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2579 itags, stream_ids = {}, []
2580 itag_qualities, res_qualities = {}, {}
2581 q = qualities([
2582 # Normally tiny is the smallest video-only formats. But
2583 # audio-only formats with unknown quality may get tagged as tiny
2584 'tiny',
2585 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2586 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2587 ])
2588 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2589
2590 for fmt in streaming_formats:
2591 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2592 continue
2593
2594 itag = str_or_none(fmt.get('itag'))
2595 audio_track = fmt.get('audioTrack') or {}
2596 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2597 if stream_id in stream_ids:
2598 continue
2599
2600 quality = fmt.get('quality')
2601 height = int_or_none(fmt.get('height'))
2602 if quality == 'tiny' or not quality:
2603 quality = fmt.get('audioQuality', '').lower() or quality
2604 # The 3gp format (17) in android client has a quality of "small",
2605 # but is actually worse than other formats
2606 if itag == '17':
2607 quality = 'tiny'
2608 if quality:
2609 if itag:
2610 itag_qualities[itag] = quality
2611 if height:
2612 res_qualities[height] = quality
2613 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2614 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2615 # number of fragment that would subsequently requested with (`&sq=N`)
2616 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2617 continue
2618
2619 fmt_url = fmt.get('url')
2620 if not fmt_url:
2621 sc = compat_parse_qs(fmt.get('signatureCipher'))
2622 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2623 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2624 if not (sc and fmt_url and encrypted_sig):
2625 continue
2626 if not player_url:
2627 continue
2628 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2629 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2630 fmt_url += '&' + sp + '=' + signature
2631
2632 query = parse_qs(fmt_url)
2633 throttled = False
2634 if query.get('n'):
2635 try:
2636 fmt_url = update_url_query(fmt_url, {
2637 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2638 except ExtractorError as e:
2639 self.report_warning(
2640 f'nsig extraction failed: You may experience throttling for some formats\n'
2641 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
2642 throttled = True
2643
2644 if itag:
2645 itags[itag] = 'https'
2646 stream_ids.append(stream_id)
2647
2648 tbr = float_or_none(
2649 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2650 dct = {
2651 'asr': int_or_none(fmt.get('audioSampleRate')),
2652 'filesize': int_or_none(fmt.get('contentLength')),
2653 'format_id': itag,
2654 'format_note': join_nonempty(
2655 '%s%s' % (audio_track.get('displayName') or '',
2656 ' (default)' if audio_track.get('audioIsDefault') else ''),
2657 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
2658 throttled and 'THROTTLED', delim=', '),
2659 'source_preference': -10 if throttled else -1,
2660 'fps': int_or_none(fmt.get('fps')) or None,
2661 'height': height,
2662 'quality': q(quality),
2663 'tbr': tbr,
2664 'url': fmt_url,
2665 'width': int_or_none(fmt.get('width')),
2666 'language': audio_track.get('id', '').split('.')[0],
2667 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2668 }
2669 mime_mobj = re.match(
2670 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2671 if mime_mobj:
2672 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2673 dct.update(parse_codecs(mime_mobj.group(2)))
2674 no_audio = dct.get('acodec') == 'none'
2675 no_video = dct.get('vcodec') == 'none'
2676 if no_audio:
2677 dct['vbr'] = tbr
2678 if no_video:
2679 dct['abr'] = tbr
2680 if no_audio or no_video:
2681 dct['downloader_options'] = {
2682 # Youtube throttles chunks >~10M
2683 'http_chunk_size': 10485760,
2684 }
2685 if dct.get('ext'):
2686 dct['container'] = dct['ext'] + '_dash'
2687 yield dct
2688
2689 live_from_start = is_live and self.get_param('live_from_start')
2690 skip_manifests = self._configuration_arg('skip')
2691 if not self.get_param('youtube_include_hls_manifest', True):
2692 skip_manifests.append('hls')
2693 get_dash = 'dash' not in skip_manifests and (
2694 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
2695 get_hls = not live_from_start and 'hls' not in skip_manifests
2696
2697 def process_manifest_format(f, proto, itag):
2698 if itag in itags:
2699 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2700 return False
2701 itag = f'{itag}-{proto}'
2702 if itag:
2703 f['format_id'] = itag
2704 itags[itag] = proto
2705
2706 f['quality'] = next((
2707 q(qdict[val])
2708 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
2709 if val in qdict), -1)
2710 return True
2711
2712 for sd in streaming_data:
2713 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2714 if hls_manifest_url:
2715 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2716 if process_manifest_format(f, 'hls', self._search_regex(
2717 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2718 yield f
2719
2720 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2721 if dash_manifest_url:
2722 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2723 if process_manifest_format(f, 'dash', f['format_id']):
2724 f['filesize'] = int_or_none(self._search_regex(
2725 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2726 if live_from_start:
2727 f['is_from_start'] = True
2728
2729 yield f
2730
2731 def _extract_storyboard(self, player_responses, duration):
2732 spec = get_first(
2733 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2734 if not spec:
2735 return
2736 base_url = spec.pop()
2737 L = len(spec) - 1
2738 for i, args in enumerate(spec):
2739 args = args.split('#')
2740 counts = list(map(int_or_none, args[:5]))
2741 if len(args) != 8 or not all(counts):
2742 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2743 continue
2744 width, height, frame_count, cols, rows = counts
2745 N, sigh = args[6:]
2746
2747 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2748 fragment_count = frame_count / (cols * rows)
2749 fragment_duration = duration / fragment_count
2750 yield {
2751 'format_id': f'sb{i}',
2752 'format_note': 'storyboard',
2753 'ext': 'mhtml',
2754 'protocol': 'mhtml',
2755 'acodec': 'none',
2756 'vcodec': 'none',
2757 'url': url,
2758 'width': width,
2759 'height': height,
2760 'fragments': [{
2761 'path': url.replace('$M', str(j)),
2762 'duration': min(fragment_duration, duration - (j * fragment_duration)),
2763 } for j in range(math.ceil(fragment_count))],
2764 }
2765
2766 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
2767 webpage = None
2768 if 'webpage' not in self._configuration_arg('player_skip'):
2769 webpage = self._download_webpage(
2770 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2771
2772 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2773
2774 player_responses, player_url = self._extract_player_responses(
2775 self._get_requested_clients(url, smuggled_data),
2776 video_id, webpage, master_ytcfg)
2777
2778 return webpage, master_ytcfg, player_responses, player_url
2779
2780 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):
2781 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2782 is_live = get_first(video_details, 'isLive')
2783 if is_live is None:
2784 is_live = get_first(live_broadcast_details, 'isLiveNow')
2785
2786 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2787 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2788
2789 return live_broadcast_details, is_live, streaming_data, formats
2790
2791 def _real_extract(self, url):
2792 url, smuggled_data = unsmuggle_url(url, {})
2793 video_id = self._match_id(url)
2794
2795 base_url = self.http_scheme() + '//www.youtube.com/'
2796 webpage_url = base_url + 'watch?v=' + video_id
2797
2798 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2799
2800 playability_statuses = traverse_obj(
2801 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2802
2803 trailer_video_id = get_first(
2804 playability_statuses,
2805 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2806 expected_type=str)
2807 if trailer_video_id:
2808 return self.url_result(
2809 trailer_video_id, self.ie_key(), trailer_video_id)
2810
2811 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2812 if webpage else (lambda x: None))
2813
2814 video_details = traverse_obj(
2815 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2816 microformats = traverse_obj(
2817 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2818 expected_type=dict, default=[])
2819 video_title = (
2820 get_first(video_details, 'title')
2821 or self._get_text(microformats, (..., 'title'))
2822 or search_meta(['og:title', 'twitter:title', 'title']))
2823 video_description = get_first(video_details, 'shortDescription')
2824
2825 multifeed_metadata_list = get_first(
2826 player_responses,
2827 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2828 expected_type=str)
2829 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2830 if self.get_param('noplaylist'):
2831 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2832 else:
2833 entries = []
2834 feed_ids = []
2835 for feed in multifeed_metadata_list.split(','):
2836 # Unquote should take place before split on comma (,) since textual
2837 # fields may contain comma as well (see
2838 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2839 feed_data = compat_parse_qs(
2840 compat_urllib_parse_unquote_plus(feed))
2841
2842 def feed_entry(name):
2843 return try_get(
2844 feed_data, lambda x: x[name][0], compat_str)
2845
2846 feed_id = feed_entry('id')
2847 if not feed_id:
2848 continue
2849 feed_title = feed_entry('title')
2850 title = video_title
2851 if feed_title:
2852 title += ' (%s)' % feed_title
2853 entries.append({
2854 '_type': 'url_transparent',
2855 'ie_key': 'Youtube',
2856 'url': smuggle_url(
2857 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2858 {'force_singlefeed': True}),
2859 'title': title,
2860 })
2861 feed_ids.append(feed_id)
2862 self.to_screen(
2863 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2864 % (', '.join(feed_ids), video_id))
2865 return self.playlist_result(
2866 entries, video_id, video_title, video_description)
2867
2868 live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)
2869
2870 if not formats:
2871 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2872 self.report_drm(video_id)
2873 pemr = get_first(
2874 playability_statuses,
2875 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2876 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2877 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2878 if subreason:
2879 if subreason == 'The uploader has not made this video available in your country.':
2880 countries = get_first(microformats, 'availableCountries')
2881 if not countries:
2882 regions_allowed = search_meta('regionsAllowed')
2883 countries = regions_allowed.split(',') if regions_allowed else None
2884 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2885 reason += f'. {subreason}'
2886 if reason:
2887 self.raise_no_formats(reason, expected=True)
2888
2889 keywords = get_first(video_details, 'keywords', expected_type=list) or []
2890 if not keywords and webpage:
2891 keywords = [
2892 unescapeHTML(m.group('content'))
2893 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2894 for keyword in keywords:
2895 if keyword.startswith('yt:stretch='):
2896 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2897 if mobj:
2898 # NB: float is intentional for forcing float division
2899 w, h = (float(v) for v in mobj.groups())
2900 if w > 0 and h > 0:
2901 ratio = w / h
2902 for f in formats:
2903 if f.get('vcodec') != 'none':
2904 f['stretched_ratio'] = ratio
2905 break
2906
2907 thumbnails = []
2908 thumbnail_dicts = traverse_obj(
2909 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2910 expected_type=dict, default=[])
2911 for thumbnail in thumbnail_dicts:
2912 thumbnail_url = thumbnail.get('url')
2913 if not thumbnail_url:
2914 continue
2915 # Sometimes youtube gives a wrong thumbnail URL. See:
2916 # https://github.com/yt-dlp/yt-dlp/issues/233
2917 # https://github.com/ytdl-org/youtube-dl/issues/28023
2918 if 'maxresdefault' in thumbnail_url:
2919 thumbnail_url = thumbnail_url.split('?')[0]
2920 thumbnails.append({
2921 'url': thumbnail_url,
2922 'height': int_or_none(thumbnail.get('height')),
2923 'width': int_or_none(thumbnail.get('width')),
2924 })
2925 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2926 if thumbnail_url:
2927 thumbnails.append({
2928 'url': thumbnail_url,
2929 })
2930 original_thumbnails = thumbnails.copy()
2931
2932 # The best resolution thumbnails sometimes does not appear in the webpage
2933 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2934 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2935 thumbnail_names = [
2936 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
2937 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2938 'mqdefault', 'mq1', 'mq2', 'mq3',
2939 'default', '1', '2', '3'
2940 ]
2941 n_thumbnail_names = len(thumbnail_names)
2942 thumbnails.extend({
2943 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2944 video_id=video_id, name=name, ext=ext,
2945 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2946 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2947 for thumb in thumbnails:
2948 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2949 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2950 self._remove_duplicate_formats(thumbnails)
2951 self._downloader._sort_thumbnails(original_thumbnails)
2952
2953 category = get_first(microformats, 'category') or search_meta('genre')
2954 channel_id = str_or_none(
2955 get_first(video_details, 'channelId')
2956 or get_first(microformats, 'externalChannelId')
2957 or search_meta('channelId'))
2958 duration = int_or_none(
2959 get_first(video_details, 'lengthSeconds')
2960 or get_first(microformats, 'lengthSeconds')
2961 or parse_duration(search_meta('duration'))) or None
2962 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2963
2964 live_content = get_first(video_details, 'isLiveContent')
2965 is_upcoming = get_first(video_details, 'isUpcoming')
2966 if is_live is None:
2967 if is_upcoming or live_content is False:
2968 is_live = False
2969 if is_upcoming is None and (live_content or is_live):
2970 is_upcoming = False
2971 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2972 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2973 if not duration and live_end_time and live_start_time:
2974 duration = live_end_time - live_start_time
2975
2976 if is_live and self.get_param('live_from_start'):
2977 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
2978
2979 formats.extend(self._extract_storyboard(player_responses, duration))
2980
2981 # Source is given priority since formats that throttle are given lower source_preference
2982 # When throttling issue is fully fixed, remove this
2983 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2984
2985 info = {
2986 'id': video_id,
2987 'title': video_title,
2988 'formats': formats,
2989 'thumbnails': thumbnails,
2990 # The best thumbnail that we are sure exists. Prevents unnecessary
2991 # URL checking if user don't care about getting the best possible thumbnail
2992 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
2993 'description': video_description,
2994 'upload_date': unified_strdate(
2995 get_first(microformats, 'uploadDate')
2996 or search_meta('uploadDate')),
2997 'uploader': get_first(video_details, 'author'),
2998 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2999 'uploader_url': owner_profile_url,
3000 'channel_id': channel_id,
3001 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
3002 'duration': duration,
3003 'view_count': int_or_none(
3004 get_first((video_details, microformats), (..., 'viewCount'))
3005 or search_meta('interactionCount')),
3006 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3007 'age_limit': 18 if (
3008 get_first(microformats, 'isFamilySafe') is False
3009 or search_meta('isFamilyFriendly') == 'false'
3010 or search_meta('og:restrictions:age') == '18+') else 0,
3011 'webpage_url': webpage_url,
3012 'categories': [category] if category else None,
3013 'tags': keywords,
3014 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3015 'is_live': is_live,
3016 'was_live': (False if is_live or is_upcoming or live_content is False
3017 else None if is_live is None or is_upcoming is None
3018 else live_content),
3019 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
3020 'release_timestamp': live_start_time,
3021 }
3022
3023 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3024 if pctr:
3025 def get_lang_code(track):
3026 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3027 or track.get('languageCode'))
3028
3029 # Converted into dicts to remove duplicates
3030 captions = {
3031 get_lang_code(sub): sub
3032 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3033 translation_languages = {
3034 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3035 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3036
3037 def process_language(container, base_url, lang_code, sub_name, query):
3038 lang_subs = container.setdefault(lang_code, [])
3039 for fmt in self._SUBTITLE_FORMATS:
3040 query.update({
3041 'fmt': fmt,
3042 })
3043 lang_subs.append({
3044 'ext': fmt,
3045 'url': update_url_query(base_url, query),
3046 'name': sub_name,
3047 })
3048
3049 subtitles, automatic_captions = {}, {}
3050 for lang_code, caption_track in captions.items():
3051 base_url = caption_track.get('baseUrl')
3052 if not base_url:
3053 continue
3054 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3055 if caption_track.get('kind') != 'asr':
3056 if not lang_code:
3057 continue
3058 process_language(
3059 subtitles, base_url, lang_code, lang_name, {})
3060 if not caption_track.get('isTranslatable'):
3061 continue
3062 for trans_code, trans_name in translation_languages.items():
3063 if not trans_code:
3064 continue
3065 if caption_track.get('kind') != 'asr':
3066 trans_code += f'-{lang_code}'
3067 trans_name += format_field(lang_name, template=' from %s')
3068 process_language(
3069 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
3070 info['automatic_captions'] = automatic_captions
3071 info['subtitles'] = subtitles
3072
3073 parsed_url = compat_urllib_parse_urlparse(url)
3074 for component in [parsed_url.fragment, parsed_url.query]:
3075 query = compat_parse_qs(component)
3076 for k, v in query.items():
3077 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3078 d_k += '_time'
3079 if d_k not in info and k in s_ks:
3080 info[d_k] = parse_duration(query[k][0])
3081
3082 # Youtube Music Auto-generated description
3083 if video_description:
3084 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3085 if mobj:
3086 release_year = mobj.group('release_year')
3087 release_date = mobj.group('release_date')
3088 if release_date:
3089 release_date = release_date.replace('-', '')
3090 if not release_year:
3091 release_year = release_date[:4]
3092 info.update({
3093 'album': mobj.group('album'.strip()),
3094 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3095 'track': mobj.group('track').strip(),
3096 'release_date': release_date,
3097 'release_year': int_or_none(release_year),
3098 })
3099
3100 initial_data = None
3101 if webpage:
3102 initial_data = self._extract_yt_initial_variable(
3103 webpage, self._YT_INITIAL_DATA_RE, video_id,
3104 'yt initial data')
3105 if not initial_data:
3106 query = {'videoId': video_id}
3107 query.update(self._get_checkok_params())
3108 initial_data = self._extract_response(
3109 item_id=video_id, ep='next', fatal=False,
3110 ytcfg=master_ytcfg, query=query,
3111 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3112 note='Downloading initial data API JSON')
3113
3114 try:
3115 # This will error if there is no livechat
3116 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3117 info.setdefault('subtitles', {})['live_chat'] = [{
3118 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3119 'video_id': video_id,
3120 'ext': 'json',
3121 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3122 }]
3123 except (KeyError, IndexError, TypeError):
3124 pass
3125
3126 if initial_data:
3127 info['chapters'] = (
3128 self._extract_chapters_from_json(initial_data, duration)
3129 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3130 or None)
3131
3132 contents = try_get(
3133 initial_data,
3134 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3135 list) or []
3136 for content in contents:
3137 vpir = content.get('videoPrimaryInfoRenderer')
3138 if vpir:
3139 stl = vpir.get('superTitleLink')
3140 if stl:
3141 stl = self._get_text(stl)
3142 if try_get(
3143 vpir,
3144 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3145 info['location'] = stl
3146 else:
3147 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3148 if mobj:
3149 info.update({
3150 'series': mobj.group(1),
3151 'season_number': int(mobj.group(2)),
3152 'episode_number': int(mobj.group(3)),
3153 })
3154 for tlb in (try_get(
3155 vpir,
3156 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3157 list) or []):
3158 tbr = tlb.get('toggleButtonRenderer') or {}
3159 for getter, regex in [(
3160 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3161 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3162 lambda x: x['accessibility'],
3163 lambda x: x['accessibilityData']['accessibilityData'],
3164 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3165 label = (try_get(tbr, getter, dict) or {}).get('label')
3166 if label:
3167 mobj = re.match(regex, label)
3168 if mobj:
3169 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3170 break
3171 sbr_tooltip = try_get(
3172 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3173 if sbr_tooltip:
3174 like_count, dislike_count = sbr_tooltip.split(' / ')
3175 info.update({
3176 'like_count': str_to_int(like_count),
3177 'dislike_count': str_to_int(dislike_count),
3178 })
3179 vsir = content.get('videoSecondaryInfoRenderer')
3180 if vsir:
3181 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3182 rows = try_get(
3183 vsir,
3184 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3185 list) or []
3186 multiple_songs = False
3187 for row in rows:
3188 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3189 multiple_songs = True
3190 break
3191 for row in rows:
3192 mrr = row.get('metadataRowRenderer') or {}
3193 mrr_title = mrr.get('title')
3194 if not mrr_title:
3195 continue
3196 mrr_title = self._get_text(mrr, 'title')
3197 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3198 if mrr_title == 'License':
3199 info['license'] = mrr_contents_text
3200 elif not multiple_songs:
3201 if mrr_title == 'Album':
3202 info['album'] = mrr_contents_text
3203 elif mrr_title == 'Artist':
3204 info['artist'] = mrr_contents_text
3205 elif mrr_title == 'Song':
3206 info['track'] = mrr_contents_text
3207
3208 fallbacks = {
3209 'channel': 'uploader',
3210 'channel_id': 'uploader_id',
3211 'channel_url': 'uploader_url',
3212 }
3213 for to, frm in fallbacks.items():
3214 if not info.get(to):
3215 info[to] = info.get(frm)
3216
3217 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3218 v = info.get(s_k)
3219 if v:
3220 info[d_k] = v
3221
3222 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3223 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3224 is_membersonly = None
3225 is_premium = None
3226 if initial_data and is_private is not None:
3227 is_membersonly = False
3228 is_premium = False
3229 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3230 badge_labels = set()
3231 for content in contents:
3232 if not isinstance(content, dict):
3233 continue
3234 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3235 for badge_label in badge_labels:
3236 if badge_label.lower() == 'members only':
3237 is_membersonly = True
3238 elif badge_label.lower() == 'premium':
3239 is_premium = True
3240 elif badge_label.lower() == 'unlisted':
3241 is_unlisted = True
3242
3243 info['availability'] = self._availability(
3244 is_private=is_private,
3245 needs_premium=is_premium,
3246 needs_subscription=is_membersonly,
3247 needs_auth=info['age_limit'] >= 18,
3248 is_unlisted=None if is_private is None else is_unlisted)
3249
3250 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3251
3252 self.mark_watched(video_id, player_responses)
3253
3254 return info
3255
3256
3257class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3258
3259 def _extract_channel_id(self, webpage):
3260 channel_id = self._html_search_meta(
3261 'channelId', webpage, 'channel id', default=None)
3262 if channel_id:
3263 return channel_id
3264 channel_url = self._html_search_meta(
3265 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3266 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3267 'twitter:app:url:googleplay'), webpage, 'channel url')
3268 return self._search_regex(
3269 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3270 channel_url, 'channel id')
3271
3272 @staticmethod
3273 def _extract_basic_item_renderer(item):
3274 # Modified from _extract_grid_item_renderer
3275 known_basic_renderers = (
3276 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3277 )
3278 for key, renderer in item.items():
3279 if not isinstance(renderer, dict):
3280 continue
3281 elif key in known_basic_renderers:
3282 return renderer
3283 elif key.startswith('grid') and key.endswith('Renderer'):
3284 return renderer
3285
3286 def _grid_entries(self, grid_renderer):
3287 for item in grid_renderer['items']:
3288 if not isinstance(item, dict):
3289 continue
3290 renderer = self._extract_basic_item_renderer(item)
3291 if not isinstance(renderer, dict):
3292 continue
3293 title = self._get_text(renderer, 'title')
3294
3295 # playlist
3296 playlist_id = renderer.get('playlistId')
3297 if playlist_id:
3298 yield self.url_result(
3299 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3300 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3301 video_title=title)
3302 continue
3303 # video
3304 video_id = renderer.get('videoId')
3305 if video_id:
3306 yield self._extract_video(renderer)
3307 continue
3308 # channel
3309 channel_id = renderer.get('channelId')
3310 if channel_id:
3311 yield self.url_result(
3312 'https://www.youtube.com/channel/%s' % channel_id,
3313 ie=YoutubeTabIE.ie_key(), video_title=title)
3314 continue
3315 # generic endpoint URL support
3316 ep_url = urljoin('https://www.youtube.com/', try_get(
3317 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3318 compat_str))
3319 if ep_url:
3320 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3321 if ie.suitable(ep_url):
3322 yield self.url_result(
3323 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3324 break
3325
3326 def _shelf_entries_from_content(self, shelf_renderer):
3327 content = shelf_renderer.get('content')
3328 if not isinstance(content, dict):
3329 return
3330 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3331 if renderer:
3332 # TODO: add support for nested playlists so each shelf is processed
3333 # as separate playlist
3334 # TODO: this includes only first N items
3335 for entry in self._grid_entries(renderer):
3336 yield entry
3337 renderer = content.get('horizontalListRenderer')
3338 if renderer:
3339 # TODO
3340 pass
3341
3342 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3343 ep = try_get(
3344 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3345 compat_str)
3346 shelf_url = urljoin('https://www.youtube.com', ep)
3347 if shelf_url:
3348 # Skipping links to another channels, note that checking for
3349 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3350 # will not work
3351 if skip_channels and '/channels?' in shelf_url:
3352 return
3353 title = self._get_text(shelf_renderer, 'title')
3354 yield self.url_result(shelf_url, video_title=title)
3355 # Shelf may not contain shelf URL, fallback to extraction from content
3356 for entry in self._shelf_entries_from_content(shelf_renderer):
3357 yield entry
3358
3359 def _playlist_entries(self, video_list_renderer):
3360 for content in video_list_renderer['contents']:
3361 if not isinstance(content, dict):
3362 continue
3363 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3364 if not isinstance(renderer, dict):
3365 continue
3366 video_id = renderer.get('videoId')
3367 if not video_id:
3368 continue
3369 yield self._extract_video(renderer)
3370
3371 def _rich_entries(self, rich_grid_renderer):
3372 renderer = try_get(
3373 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3374 video_id = renderer.get('videoId')
3375 if not video_id:
3376 return
3377 yield self._extract_video(renderer)
3378
3379 def _video_entry(self, video_renderer):
3380 video_id = video_renderer.get('videoId')
3381 if video_id:
3382 return self._extract_video(video_renderer)
3383
3384 def _post_thread_entries(self, post_thread_renderer):
3385 post_renderer = try_get(
3386 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3387 if not post_renderer:
3388 return
3389 # video attachment
3390 video_renderer = try_get(
3391 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3392 video_id = video_renderer.get('videoId')
3393 if video_id:
3394 entry = self._extract_video(video_renderer)
3395 if entry:
3396 yield entry
3397 # playlist attachment
3398 playlist_id = try_get(
3399 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3400 if playlist_id:
3401 yield self.url_result(
3402 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3403 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3404 # inline video links
3405 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3406 for run in runs:
3407 if not isinstance(run, dict):
3408 continue
3409 ep_url = try_get(
3410 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3411 if not ep_url:
3412 continue
3413 if not YoutubeIE.suitable(ep_url):
3414 continue
3415 ep_video_id = YoutubeIE._match_id(ep_url)
3416 if video_id == ep_video_id:
3417 continue
3418 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3419
3420 def _post_thread_continuation_entries(self, post_thread_continuation):
3421 contents = post_thread_continuation.get('contents')
3422 if not isinstance(contents, list):
3423 return
3424 for content in contents:
3425 renderer = content.get('backstagePostThreadRenderer')
3426 if not isinstance(renderer, dict):
3427 continue
3428 for entry in self._post_thread_entries(renderer):
3429 yield entry
3430
3431 r''' # unused
3432 def _rich_grid_entries(self, contents):
3433 for content in contents:
3434 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3435 if video_renderer:
3436 entry = self._video_entry(video_renderer)
3437 if entry:
3438 yield entry
3439 '''
3440 def _extract_entries(self, parent_renderer, continuation_list):
3441 # continuation_list is modified in-place with continuation_list = [continuation_token]
3442 continuation_list[:] = [None]
3443 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3444 for content in contents:
3445 if not isinstance(content, dict):
3446 continue
3447 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3448 if not is_renderer:
3449 renderer = content.get('richItemRenderer')
3450 if renderer:
3451 for entry in self._rich_entries(renderer):
3452 yield entry
3453 continuation_list[0] = self._extract_continuation(parent_renderer)
3454 continue
3455 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3456 for isr_content in isr_contents:
3457 if not isinstance(isr_content, dict):
3458 continue
3459
3460 known_renderers = {
3461 'playlistVideoListRenderer': self._playlist_entries,
3462 'gridRenderer': self._grid_entries,
3463 'shelfRenderer': lambda x: self._shelf_entries(x),
3464 'backstagePostThreadRenderer': self._post_thread_entries,
3465 'videoRenderer': lambda x: [self._video_entry(x)],
3466 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3467 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
3468 }
3469 for key, renderer in isr_content.items():
3470 if key not in known_renderers:
3471 continue
3472 for entry in known_renderers[key](renderer):
3473 if entry:
3474 yield entry
3475 continuation_list[0] = self._extract_continuation(renderer)
3476 break
3477
3478 if not continuation_list[0]:
3479 continuation_list[0] = self._extract_continuation(is_renderer)
3480
3481 if not continuation_list[0]:
3482 continuation_list[0] = self._extract_continuation(parent_renderer)
3483
3484 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3485 continuation_list = [None]
3486 extract_entries = lambda x: self._extract_entries(x, continuation_list)
3487 tab_content = try_get(tab, lambda x: x['content'], dict)
3488 if not tab_content:
3489 return
3490 parent_renderer = (
3491 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3492 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3493 for entry in extract_entries(parent_renderer):
3494 yield entry
3495 continuation = continuation_list[0]
3496
3497 for page_num in itertools.count(1):
3498 if not continuation:
3499 break
3500 headers = self.generate_api_headers(
3501 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3502 response = self._extract_response(
3503 item_id='%s page %s' % (item_id, page_num),
3504 query=continuation, headers=headers, ytcfg=ytcfg,
3505 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3506
3507 if not response:
3508 break
3509 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3510 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3511 visitor_data = self._extract_visitor_data(response) or visitor_data
3512
3513 known_continuation_renderers = {
3514 'playlistVideoListContinuation': self._playlist_entries,
3515 'gridContinuation': self._grid_entries,
3516 'itemSectionContinuation': self._post_thread_continuation_entries,
3517 'sectionListContinuation': extract_entries, # for feeds
3518 }
3519 continuation_contents = try_get(
3520 response, lambda x: x['continuationContents'], dict) or {}
3521 continuation_renderer = None
3522 for key, value in continuation_contents.items():
3523 if key not in known_continuation_renderers:
3524 continue
3525 continuation_renderer = value
3526 continuation_list = [None]
3527 for entry in known_continuation_renderers[key](continuation_renderer):
3528 yield entry
3529 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3530 break
3531 if continuation_renderer:
3532 continue
3533
3534 known_renderers = {
3535 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3536 'gridVideoRenderer': (self._grid_entries, 'items'),
3537 'gridChannelRenderer': (self._grid_entries, 'items'),
3538 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3539 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3540 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3541 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3542 }
3543 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3544 continuation_items = try_get(
3545 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3546 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3547 video_items_renderer = None
3548 for key, value in continuation_item.items():
3549 if key not in known_renderers:
3550 continue
3551 video_items_renderer = {known_renderers[key][1]: continuation_items}
3552 continuation_list = [None]
3553 for entry in known_renderers[key][0](video_items_renderer):
3554 yield entry
3555 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3556 break
3557 if video_items_renderer:
3558 continue
3559 break
3560
3561 @staticmethod
3562 def _extract_selected_tab(tabs):
3563 for tab in tabs:
3564 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3565 if renderer.get('selected') is True:
3566 return renderer
3567 else:
3568 raise ExtractorError('Unable to find selected tab')
3569
3570 @classmethod
3571 def _extract_uploader(cls, data):
3572 uploader = {}
3573 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3574 owner = try_get(
3575 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3576 if owner:
3577 uploader['uploader'] = owner.get('text')
3578 uploader['uploader_id'] = try_get(
3579 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3580 uploader['uploader_url'] = urljoin(
3581 'https://www.youtube.com/',
3582 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3583 return {k: v for k, v in uploader.items() if v is not None}
3584
3585 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
3586 playlist_id = title = description = channel_url = channel_name = channel_id = None
3587 thumbnails_list = []
3588 tags = []
3589
3590 selected_tab = self._extract_selected_tab(tabs)
3591 renderer = try_get(
3592 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3593 if renderer:
3594 channel_name = renderer.get('title')
3595 channel_url = renderer.get('channelUrl')
3596 channel_id = renderer.get('externalId')
3597 else:
3598 renderer = try_get(
3599 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3600
3601 if renderer:
3602 title = renderer.get('title')
3603 description = renderer.get('description', '')
3604 playlist_id = channel_id
3605 tags = renderer.get('keywords', '').split()
3606 thumbnails_list = (
3607 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3608 or try_get(
3609 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3610 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3611 list)
3612 or [])
3613
3614 thumbnails = []
3615 for t in thumbnails_list:
3616 if not isinstance(t, dict):
3617 continue
3618 thumbnail_url = url_or_none(t.get('url'))
3619 if not thumbnail_url:
3620 continue
3621 thumbnails.append({
3622 'url': thumbnail_url,
3623 'width': int_or_none(t.get('width')),
3624 'height': int_or_none(t.get('height')),
3625 })
3626 if playlist_id is None:
3627 playlist_id = item_id
3628 if title is None:
3629 title = (
3630 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3631 or playlist_id)
3632 title += format_field(selected_tab, 'title', ' - %s')
3633 title += format_field(selected_tab, 'expandedText', ' - %s')
3634 metadata = {
3635 'playlist_id': playlist_id,
3636 'playlist_title': title,
3637 'playlist_description': description,
3638 'uploader': channel_name,
3639 'uploader_id': channel_id,
3640 'uploader_url': channel_url,
3641 'thumbnails': thumbnails,
3642 'tags': tags,
3643 }
3644 availability = self._extract_availability(data)
3645 if availability:
3646 metadata['availability'] = availability
3647 if not channel_id:
3648 metadata.update(self._extract_uploader(data))
3649 metadata.update({
3650 'channel': metadata['uploader'],
3651 'channel_id': metadata['uploader_id'],
3652 'channel_url': metadata['uploader_url']})
3653 return self.playlist_result(
3654 self._entries(
3655 selected_tab, playlist_id, ytcfg,
3656 self._extract_account_syncid(ytcfg, data),
3657 self._extract_visitor_data(data, ytcfg)),
3658 **metadata)
3659
3660 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3661 first_id = last_id = response = None
3662 for page_num in itertools.count(1):
3663 videos = list(self._playlist_entries(playlist))
3664 if not videos:
3665 return
3666 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3667 if start >= len(videos):
3668 return
3669 for video in videos[start:]:
3670 if video['id'] == first_id:
3671 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3672 return
3673 yield video
3674 first_id = first_id or videos[0]['id']
3675 last_id = videos[-1]['id']
3676 watch_endpoint = try_get(
3677 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3678 headers = self.generate_api_headers(
3679 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3680 visitor_data=self._extract_visitor_data(response, data, ytcfg))
3681 query = {
3682 'playlistId': playlist_id,
3683 'videoId': watch_endpoint.get('videoId') or last_id,
3684 'index': watch_endpoint.get('index') or len(videos),
3685 'params': watch_endpoint.get('params') or 'OAE%3D'
3686 }
3687 response = self._extract_response(
3688 item_id='%s page %d' % (playlist_id, page_num),
3689 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3690 check_get_keys='contents'
3691 )
3692 playlist = try_get(
3693 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3694
3695 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
3696 title = playlist.get('title') or try_get(
3697 data, lambda x: x['titleText']['simpleText'], compat_str)
3698 playlist_id = playlist.get('playlistId') or item_id
3699
3700 # Delegating everything except mix playlists to regular tab-based playlist URL
3701 playlist_url = urljoin(url, try_get(
3702 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3703 compat_str))
3704 if playlist_url and playlist_url != url:
3705 return self.url_result(
3706 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3707 video_title=title)
3708
3709 return self.playlist_result(
3710 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
3711 playlist_id=playlist_id, playlist_title=title)
3712
3713 def _extract_availability(self, data):
3714 """
3715 Gets the availability of a given playlist/tab.
3716 Note: Unless YouTube tells us explicitly, we do not assume it is public
3717 @param data: response
3718 """
3719 is_private = is_unlisted = None
3720 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3721 badge_labels = self._extract_badges(renderer)
3722
3723 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3724 privacy_dropdown_entries = try_get(
3725 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3726 for renderer_dict in privacy_dropdown_entries:
3727 is_selected = try_get(
3728 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3729 if not is_selected:
3730 continue
3731 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
3732 if label:
3733 badge_labels.add(label.lower())
3734 break
3735
3736 for badge_label in badge_labels:
3737 if badge_label == 'unlisted':
3738 is_unlisted = True
3739 elif badge_label == 'private':
3740 is_private = True
3741 elif badge_label == 'public':
3742 is_unlisted = is_private = False
3743 return self._availability(is_private, False, False, False, is_unlisted)
3744
3745 @staticmethod
3746 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3747 sidebar_renderer = try_get(
3748 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3749 for item in sidebar_renderer:
3750 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3751 if renderer:
3752 return renderer
3753
3754 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
3755 """
3756 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3757 """
3758 browse_id = params = None
3759 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3760 if not renderer:
3761 return
3762 menu_renderer = try_get(
3763 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3764 for menu_item in menu_renderer:
3765 if not isinstance(menu_item, dict):
3766 continue
3767 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3768 text = try_get(
3769 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3770 if not text or text.lower() != 'show unavailable videos':
3771 continue
3772 browse_endpoint = try_get(
3773 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3774 browse_id = browse_endpoint.get('browseId')
3775 params = browse_endpoint.get('params')
3776 break
3777
3778 headers = self.generate_api_headers(
3779 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3780 visitor_data=self._extract_visitor_data(data, ytcfg))
3781 query = {
3782 'params': params or 'wgYCCAA=',
3783 'browseId': browse_id or 'VL%s' % item_id
3784 }
3785 return self._extract_response(
3786 item_id=item_id, headers=headers, query=query,
3787 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
3788 note='Downloading API JSON with unavailable videos')
3789
3790 def _extract_webpage(self, url, item_id, fatal=True):
3791 retries = self.get_param('extractor_retries', 3)
3792 count = -1
3793 webpage = data = last_error = None
3794 while count < retries:
3795 count += 1
3796 # Sometimes youtube returns a webpage with incomplete ytInitialData
3797 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3798 if last_error:
3799 self.report_warning('%s. Retrying ...' % last_error)
3800 try:
3801 webpage = self._download_webpage(
3802 url, item_id,
3803 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3804 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3805 except ExtractorError as e:
3806 if isinstance(e.cause, network_exceptions):
3807 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3808 last_error = error_to_compat_str(e.cause or e.msg)
3809 if count < retries:
3810 continue
3811 if fatal:
3812 raise
3813 self.report_warning(error_to_compat_str(e))
3814 break
3815 else:
3816 try:
3817 self._extract_and_report_alerts(data)
3818 except ExtractorError as e:
3819 if fatal:
3820 raise
3821 self.report_warning(error_to_compat_str(e))
3822 break
3823
3824 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3825 break
3826
3827 last_error = 'Incomplete yt initial data received'
3828 if count >= retries:
3829 if fatal:
3830 raise ExtractorError(last_error)
3831 self.report_warning(last_error)
3832 break
3833
3834 return webpage, data
3835
3836 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3837 data = None
3838 if 'webpage' not in self._configuration_arg('skip'):
3839 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3840 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3841 if not data:
3842 if not ytcfg and self.is_authenticated:
3843 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3844 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3845 raise ExtractorError(
3846 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3847 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3848 expected=True)
3849 self.report_warning(msg, only_once=True)
3850 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3851 return data, ytcfg
3852
3853 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3854 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3855 resolve_response = self._extract_response(
3856 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3857 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3858 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3859 for ep_key, ep in endpoints.items():
3860 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3861 if params:
3862 return self._extract_response(
3863 item_id=item_id, query=params, ep=ep, headers=headers,
3864 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3865 check_get_keys=('contents', 'currentVideoEndpoint'))
3866 err_note = 'Failed to resolve url (does the playlist exist?)'
3867 if fatal:
3868 raise ExtractorError(err_note, expected=True)
3869 self.report_warning(err_note, item_id)
3870
3871 @staticmethod
3872 def _smuggle_data(entries, data):
3873 for entry in entries:
3874 if data:
3875 entry['url'] = smuggle_url(entry['url'], data)
3876 yield entry
3877
3878 _SEARCH_PARAMS = None
3879
3880 def _search_results(self, query, params=NO_DEFAULT):
3881 data = {'query': query}
3882 if params is NO_DEFAULT:
3883 params = self._SEARCH_PARAMS
3884 if params:
3885 data['params'] = params
3886 continuation_list = [None]
3887 for page_num in itertools.count(1):
3888 data.update(continuation_list[0] or {})
3889 search = self._extract_response(
3890 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3891 check_get_keys=('contents', 'onResponseReceivedCommands'))
3892 slr_contents = try_get(
3893 search,
3894 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3895 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3896 list)
3897 yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3898 if not continuation_list[0]:
3899 break
3900
3901
3902class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3903 IE_DESC = 'YouTube Tabs'
3904 _VALID_URL = r'''(?x:
3905 https?://
3906 (?:\w+\.)?
3907 (?:
3908 youtube(?:kids)?\.com|
3909 %(invidious)s
3910 )/
3911 (?:
3912 (?P<channel_type>channel|c|user|browse)/|
3913 (?P<not_channel>
3914 feed/|hashtag/|
3915 (?:playlist|watch)\?.*?\blist=
3916 )|
3917 (?!(?:%(reserved_names)s)\b) # Direct URLs
3918 )
3919 (?P<id>[^/?\#&]+)
3920 )''' % {
3921 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3922 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3923 }
3924 IE_NAME = 'youtube:tab'
3925
3926 _TESTS = [{
3927 'note': 'playlists, multipage',
3928 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3929 'playlist_mincount': 94,
3930 'info_dict': {
3931 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3932 'title': 'Игорь Клейнер - Playlists',
3933 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3934 'uploader': 'Игорь Клейнер',
3935 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3936 },
3937 }, {
3938 'note': 'playlists, multipage, different order',
3939 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3940 'playlist_mincount': 94,
3941 'info_dict': {
3942 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3943 'title': 'Игорь Клейнер - Playlists',
3944 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3945 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3946 'uploader': 'Игорь Клейнер',
3947 },
3948 }, {
3949 'note': 'playlists, series',
3950 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3951 'playlist_mincount': 5,
3952 'info_dict': {
3953 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3954 'title': '3Blue1Brown - Playlists',
3955 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3956 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3957 'uploader': '3Blue1Brown',
3958 },
3959 }, {
3960 'note': 'playlists, singlepage',
3961 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3962 'playlist_mincount': 4,
3963 'info_dict': {
3964 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3965 'title': 'ThirstForScience - Playlists',
3966 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3967 'uploader': 'ThirstForScience',
3968 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3969 }
3970 }, {
3971 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3972 'only_matching': True,
3973 }, {
3974 'note': 'basic, single video playlist',
3975 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3976 'info_dict': {
3977 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3978 'uploader': 'Sergey M.',
3979 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3980 'title': 'youtube-dl public playlist',
3981 },
3982 'playlist_count': 1,
3983 }, {
3984 'note': 'empty playlist',
3985 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3986 'info_dict': {
3987 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3988 'uploader': 'Sergey M.',
3989 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3990 'title': 'youtube-dl empty playlist',
3991 },
3992 'playlist_count': 0,
3993 }, {
3994 'note': 'Home tab',
3995 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3996 'info_dict': {
3997 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3998 'title': 'lex will - Home',
3999 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4000 'uploader': 'lex will',
4001 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4002 },
4003 'playlist_mincount': 2,
4004 }, {
4005 'note': 'Videos tab',
4006 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4007 'info_dict': {
4008 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4009 'title': 'lex will - Videos',
4010 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4011 'uploader': 'lex will',
4012 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4013 },
4014 'playlist_mincount': 975,
4015 }, {
4016 'note': 'Videos tab, sorted by popular',
4017 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4018 'info_dict': {
4019 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4020 'title': 'lex will - Videos',
4021 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4022 'uploader': 'lex will',
4023 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4024 },
4025 'playlist_mincount': 199,
4026 }, {
4027 'note': 'Playlists tab',
4028 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4029 'info_dict': {
4030 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4031 'title': 'lex will - Playlists',
4032 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4033 'uploader': 'lex will',
4034 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4035 },
4036 'playlist_mincount': 17,
4037 }, {
4038 'note': 'Community tab',
4039 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4040 'info_dict': {
4041 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4042 'title': 'lex will - Community',
4043 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4044 'uploader': 'lex will',
4045 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4046 },
4047 'playlist_mincount': 18,
4048 }, {
4049 'note': 'Channels tab',
4050 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4051 'info_dict': {
4052 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4053 'title': 'lex will - Channels',
4054 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4055 'uploader': 'lex will',
4056 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4057 },
4058 'playlist_mincount': 12,
4059 }, {
4060 'note': 'Search tab',
4061 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4062 'playlist_mincount': 40,
4063 'info_dict': {
4064 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4065 'title': '3Blue1Brown - Search - linear algebra',
4066 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4067 'uploader': '3Blue1Brown',
4068 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4069 },
4070 }, {
4071 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4072 'only_matching': True,
4073 }, {
4074 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4075 'only_matching': True,
4076 }, {
4077 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4078 'only_matching': True,
4079 }, {
4080 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4081 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4082 'info_dict': {
4083 'title': '29C3: Not my department',
4084 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4085 'uploader': 'Christiaan008',
4086 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4087 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
4088 },
4089 'playlist_count': 96,
4090 }, {
4091 'note': 'Large playlist',
4092 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4093 'info_dict': {
4094 'title': 'Uploads from Cauchemar',
4095 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4096 'uploader': 'Cauchemar',
4097 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4098 },
4099 'playlist_mincount': 1123,
4100 }, {
4101 'note': 'even larger playlist, 8832 videos',
4102 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4103 'only_matching': True,
4104 }, {
4105 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4106 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4107 'info_dict': {
4108 'title': 'Uploads from Interstellar Movie',
4109 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4110 'uploader': 'Interstellar Movie',
4111 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4112 },
4113 'playlist_mincount': 21,
4114 }, {
4115 'note': 'Playlist with "show unavailable videos" button',
4116 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4117 'info_dict': {
4118 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4119 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4120 'uploader': 'Phim Siêu Nhân Nhật Bản',
4121 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4122 },
4123 'playlist_mincount': 200,
4124 }, {
4125 'note': 'Playlist with unavailable videos in page 7',
4126 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
4127 'info_dict': {
4128 'title': 'Uploads from BlankTV',
4129 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4130 'uploader': 'BlankTV',
4131 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4132 },
4133 'playlist_mincount': 1000,
4134 }, {
4135 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4136 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4137 'info_dict': {
4138 'title': 'Data Analysis with Dr Mike Pound',
4139 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4140 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4141 'uploader': 'Computerphile',
4142 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
4143 },
4144 'playlist_mincount': 11,
4145 }, {
4146 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4147 'only_matching': True,
4148 }, {
4149 'note': 'Playlist URL that does not actually serve a playlist',
4150 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4151 'info_dict': {
4152 'id': 'FqZTN594JQw',
4153 'ext': 'webm',
4154 'title': "Smiley's People 01 detective, Adventure Series, Action",
4155 'uploader': 'STREEM',
4156 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4157 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4158 'upload_date': '20150526',
4159 'license': 'Standard YouTube License',
4160 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4161 'categories': ['People & Blogs'],
4162 'tags': list,
4163 'view_count': int,
4164 'like_count': int,
4165 'dislike_count': int,
4166 },
4167 'params': {
4168 'skip_download': True,
4169 },
4170 'skip': 'This video is not available.',
4171 'add_ie': [YoutubeIE.ie_key()],
4172 }, {
4173 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4174 'only_matching': True,
4175 }, {
4176 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4177 'only_matching': True,
4178 }, {
4179 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4180 'info_dict': {
4181 'id': '3yImotZU3tw', # This will keep changing
4182 'ext': 'mp4',
4183 'title': compat_str,
4184 'uploader': 'Sky News',
4185 'uploader_id': 'skynews',
4186 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4187 'upload_date': r're:\d{8}',
4188 'description': compat_str,
4189 'categories': ['News & Politics'],
4190 'tags': list,
4191 'like_count': int,
4192 'dislike_count': int,
4193 },
4194 'params': {
4195 'skip_download': True,
4196 },
4197 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4198 }, {
4199 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4200 'info_dict': {
4201 'id': 'a48o2S1cPoo',
4202 'ext': 'mp4',
4203 'title': 'The Young Turks - Live Main Show',
4204 'uploader': 'The Young Turks',
4205 'uploader_id': 'TheYoungTurks',
4206 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4207 'upload_date': '20150715',
4208 'license': 'Standard YouTube License',
4209 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4210 'categories': ['News & Politics'],
4211 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4212 'like_count': int,
4213 'dislike_count': int,
4214 },
4215 'params': {
4216 'skip_download': True,
4217 },
4218 'only_matching': True,
4219 }, {
4220 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4221 'only_matching': True,
4222 }, {
4223 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4224 'only_matching': True,
4225 }, {
4226 'note': 'A channel that is not live. Should raise error',
4227 'url': 'https://www.youtube.com/user/numberphile/live',
4228 'only_matching': True,
4229 }, {
4230 'url': 'https://www.youtube.com/feed/trending',
4231 'only_matching': True,
4232 }, {
4233 'url': 'https://www.youtube.com/feed/library',
4234 'only_matching': True,
4235 }, {
4236 'url': 'https://www.youtube.com/feed/history',
4237 'only_matching': True,
4238 }, {
4239 'url': 'https://www.youtube.com/feed/subscriptions',
4240 'only_matching': True,
4241 }, {
4242 'url': 'https://www.youtube.com/feed/watch_later',
4243 'only_matching': True,
4244 }, {
4245 'note': 'Recommended - redirects to home page.',
4246 'url': 'https://www.youtube.com/feed/recommended',
4247 'only_matching': True,
4248 }, {
4249 'note': 'inline playlist with not always working continuations',
4250 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4251 'only_matching': True,
4252 }, {
4253 'url': 'https://www.youtube.com/course',
4254 'only_matching': True,
4255 }, {
4256 'url': 'https://www.youtube.com/zsecurity',
4257 'only_matching': True,
4258 }, {
4259 'url': 'http://www.youtube.com/NASAgovVideo/videos',
4260 'only_matching': True,
4261 }, {
4262 'url': 'https://www.youtube.com/TheYoungTurks/live',
4263 'only_matching': True,
4264 }, {
4265 'url': 'https://www.youtube.com/hashtag/cctv9',
4266 'info_dict': {
4267 'id': 'cctv9',
4268 'title': '#cctv9',
4269 },
4270 'playlist_mincount': 350,
4271 }, {
4272 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4273 'only_matching': True,
4274 }, {
4275 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4276 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4277 'only_matching': True
4278 }, {
4279 'note': '/browse/ should redirect to /channel/',
4280 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4281 'only_matching': True
4282 }, {
4283 'note': 'VLPL, should redirect to playlist?list=PL...',
4284 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4285 'info_dict': {
4286 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4287 'uploader': 'NoCopyrightSounds',
4288 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4289 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4290 'title': 'NCS Releases',
4291 },
4292 'playlist_mincount': 166,
4293 }, {
4294 'note': 'Topic, should redirect to playlist?list=UU...',
4295 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4296 'info_dict': {
4297 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4298 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4299 'title': 'Uploads from Royalty Free Music - Topic',
4300 'uploader': 'Royalty Free Music - Topic',
4301 },
4302 'expected_warnings': [
4303 'A channel/user page was given',
4304 'The URL does not have a videos tab',
4305 ],
4306 'playlist_mincount': 101,
4307 }, {
4308 'note': 'Topic without a UU playlist',
4309 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4310 'info_dict': {
4311 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4312 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4313 },
4314 'expected_warnings': [
4315 'A channel/user page was given',
4316 'The URL does not have a videos tab',
4317 'Falling back to channel URL',
4318 ],
4319 'playlist_mincount': 9,
4320 }, {
4321 'note': 'Youtube music Album',
4322 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4323 'info_dict': {
4324 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4325 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4326 },
4327 'playlist_count': 50,
4328 }, {
4329 'note': 'unlisted single video playlist',
4330 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4331 'info_dict': {
4332 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4333 'uploader': 'colethedj',
4334 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4335 'title': 'yt-dlp unlisted playlist test',
4336 'availability': 'unlisted'
4337 },
4338 'playlist_count': 1,
4339 }, {
4340 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4341 'url': 'https://www.youtube.com/feed/recommended',
4342 'info_dict': {
4343 'id': 'recommended',
4344 'title': 'recommended',
4345 },
4346 'playlist_mincount': 50,
4347 'params': {
4348 'skip_download': True,
4349 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4350 },
4351 }, {
4352 'note': 'API Fallback: /videos tab, sorted by oldest first',
4353 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4354 'info_dict': {
4355 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4356 'title': 'Cody\'sLab - Videos',
4357 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4358 'uploader': 'Cody\'sLab',
4359 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4360 },
4361 'playlist_mincount': 650,
4362 'params': {
4363 'skip_download': True,
4364 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4365 },
4366 }, {
4367 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4368 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4369 'info_dict': {
4370 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4371 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4372 'title': 'Uploads from Royalty Free Music - Topic',
4373 'uploader': 'Royalty Free Music - Topic',
4374 },
4375 'expected_warnings': [
4376 'A channel/user page was given',
4377 'The URL does not have a videos tab',
4378 ],
4379 'playlist_mincount': 101,
4380 'params': {
4381 'skip_download': True,
4382 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4383 },
4384 }]
4385
4386 @classmethod
4387 def suitable(cls, url):
4388 return False if YoutubeIE.suitable(url) else super(
4389 YoutubeTabIE, cls).suitable(url)
4390
4391 def _real_extract(self, url):
4392 url, smuggled_data = unsmuggle_url(url, {})
4393 if self.is_music_url(url):
4394 smuggled_data['is_music_url'] = True
4395 info_dict = self.__real_extract(url, smuggled_data)
4396 if info_dict.get('entries'):
4397 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4398 return info_dict
4399
4400 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$')
4401
4402 def __real_extract(self, url, smuggled_data):
4403 item_id = self._match_id(url)
4404 url = compat_urlparse.urlunparse(
4405 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4406 compat_opts = self.get_param('compat_opts', [])
4407
4408 def get_mobj(url):
4409 mobj = self._URL_RE.match(url).groupdict()
4410 mobj.update((k, '') for k, v in mobj.items() if v is None)
4411 return mobj
4412
4413 mobj, redirect_warning = get_mobj(url), None
4414 # Youtube returns incomplete data if tabname is not lower case
4415 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4416 if is_channel:
4417 if smuggled_data.get('is_music_url'):
4418 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
4419 item_id = item_id[2:]
4420 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
4421 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4422 mdata = self._extract_tab_endpoint(
4423 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
4424 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
4425 get_all=False, expected_type=compat_str)
4426 if not murl:
4427 raise ExtractorError('Failed to resolve album to playlist')
4428 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4429 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
4430 pre = f'https://www.youtube.com/channel/{item_id}'
4431
4432 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4433 # Home URLs should redirect to /videos/
4434 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
4435 'To download only the videos in the home page, add a "/featured" to the URL')
4436 tab = '/videos'
4437
4438 url = ''.join((pre, tab, post))
4439 mobj = get_mobj(url)
4440
4441 # Handle both video/playlist URLs
4442 qs = parse_qs(url)
4443 video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]
4444
4445 if not video_id and mobj['not_channel'].startswith('watch'):
4446 if not playlist_id:
4447 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4448 raise ExtractorError('Unable to recognize tab page')
4449 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4450 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
4451 url = f'https://www.youtube.com/playlist?list={playlist_id}'
4452 mobj = get_mobj(url)
4453
4454 if video_id and playlist_id:
4455 if self.get_param('noplaylist'):
4456 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4457 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4458 ie=YoutubeIE.ie_key(), video_id=video_id)
4459 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
4460
4461 data, ytcfg = self._extract_data(url, item_id)
4462
4463 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4464 if tabs:
4465 selected_tab = self._extract_selected_tab(tabs)
4466 tab_name = selected_tab.get('title', '')
4467 if 'no-youtube-channel-redirect' not in compat_opts:
4468 if mobj['tab'] == '/live':
4469 # Live tab should have redirected to the video
4470 raise ExtractorError('The channel is not currently live', expected=True)
4471 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4472 redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab'
4473 if not mobj['not_channel'] and item_id[:2] == 'UC':
4474 # Topic channels don't have /videos. Use the equivalent playlist instead
4475 pl_id = f'UU{item_id[2:]}'
4476 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
4477 try:
4478 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True)
4479 except ExtractorError:
4480 redirect_warning += ' and the playlist redirect gave error'
4481 else:
4482 item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:]
4483 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
4484 if tab_name.lower() != mobj['tab'][1:]:
4485 redirect_warning += f'. {tab_name} tab is being downloaded instead'
4486
4487 if redirect_warning:
4488 self.report_warning(redirect_warning)
4489 self.write_debug(f'Final URL: {url}')
4490
4491 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4492 if 'no-youtube-unavailable-videos' not in compat_opts:
4493 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4494 self._extract_and_report_alerts(data, only_once=True)
4495 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4496 if tabs:
4497 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4498
4499 playlist = traverse_obj(
4500 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
4501 if playlist:
4502 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4503
4504 video_id = traverse_obj(
4505 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
4506 if video_id:
4507 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4508 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
4509 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4510 ie=YoutubeIE.ie_key(), video_id=video_id)
4511
4512 raise ExtractorError('Unable to recognize tab page')
4513
4514
4515class YoutubePlaylistIE(InfoExtractor):
4516 IE_DESC = 'YouTube playlists'
4517 _VALID_URL = r'''(?x)(?:
4518 (?:https?://)?
4519 (?:\w+\.)?
4520 (?:
4521 (?:
4522 youtube(?:kids)?\.com|
4523 %(invidious)s
4524 )
4525 /.*?\?.*?\blist=
4526 )?
4527 (?P<id>%(playlist_id)s)
4528 )''' % {
4529 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4530 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4531 }
4532 IE_NAME = 'youtube:playlist'
4533 _TESTS = [{
4534 'note': 'issue #673',
4535 'url': 'PLBB231211A4F62143',
4536 'info_dict': {
4537 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4538 'id': 'PLBB231211A4F62143',
4539 'uploader': 'Wickydoo',
4540 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4541 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4542 },
4543 'playlist_mincount': 29,
4544 }, {
4545 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4546 'info_dict': {
4547 'title': 'YDL_safe_search',
4548 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4549 },
4550 'playlist_count': 2,
4551 'skip': 'This playlist is private',
4552 }, {
4553 'note': 'embedded',
4554 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4555 'playlist_count': 4,
4556 'info_dict': {
4557 'title': 'JODA15',
4558 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4559 'uploader': 'milan',
4560 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4561 }
4562 }, {
4563 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4564 'playlist_mincount': 654,
4565 'info_dict': {
4566 'title': '2018 Chinese New Singles (11/6 updated)',
4567 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4568 'uploader': 'LBK',
4569 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4570 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4571 }
4572 }, {
4573 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4574 'only_matching': True,
4575 }, {
4576 # music album playlist
4577 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4578 'only_matching': True,
4579 }]
4580
4581 @classmethod
4582 def suitable(cls, url):
4583 if YoutubeTabIE.suitable(url):
4584 return False
4585 from ..utils import parse_qs
4586 qs = parse_qs(url)
4587 if qs.get('v', [None])[0]:
4588 return False
4589 return super(YoutubePlaylistIE, cls).suitable(url)
4590
4591 def _real_extract(self, url):
4592 playlist_id = self._match_id(url)
4593 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4594 url = update_url_query(
4595 'https://www.youtube.com/playlist',
4596 parse_qs(url) or {'list': playlist_id})
4597 if is_music_url:
4598 url = smuggle_url(url, {'is_music_url': True})
4599 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4600
4601
4602class YoutubeYtBeIE(InfoExtractor):
4603 IE_DESC = 'youtu.be'
4604 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4605 _TESTS = [{
4606 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4607 'info_dict': {
4608 'id': 'yeWKywCrFtk',
4609 'ext': 'mp4',
4610 'title': 'Small Scale Baler and Braiding Rugs',
4611 'uploader': 'Backus-Page House Museum',
4612 'uploader_id': 'backuspagemuseum',
4613 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4614 'upload_date': '20161008',
4615 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4616 'categories': ['Nonprofits & Activism'],
4617 'tags': list,
4618 'like_count': int,
4619 'dislike_count': int,
4620 },
4621 'params': {
4622 'noplaylist': True,
4623 'skip_download': True,
4624 },
4625 }, {
4626 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4627 'only_matching': True,
4628 }]
4629
4630 def _real_extract(self, url):
4631 mobj = self._match_valid_url(url)
4632 video_id = mobj.group('id')
4633 playlist_id = mobj.group('playlist_id')
4634 return self.url_result(
4635 update_url_query('https://www.youtube.com/watch', {
4636 'v': video_id,
4637 'list': playlist_id,
4638 'feature': 'youtu.be',
4639 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4640
4641
4642class YoutubeYtUserIE(InfoExtractor):
4643 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
4644 _VALID_URL = r'ytuser:(?P<id>.+)'
4645 _TESTS = [{
4646 'url': 'ytuser:phihag',
4647 'only_matching': True,
4648 }]
4649
4650 def _real_extract(self, url):
4651 user_id = self._match_id(url)
4652 return self.url_result(
4653 'https://www.youtube.com/user/%s/videos' % user_id,
4654 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4655
4656
4657class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4658 IE_NAME = 'youtube:favorites'
4659 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
4660 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4661 _LOGIN_REQUIRED = True
4662 _TESTS = [{
4663 'url': ':ytfav',
4664 'only_matching': True,
4665 }, {
4666 'url': ':ytfavorites',
4667 'only_matching': True,
4668 }]
4669
4670 def _real_extract(self, url):
4671 return self.url_result(
4672 'https://www.youtube.com/playlist?list=LL',
4673 ie=YoutubeTabIE.ie_key())
4674
4675
4676class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4677 IE_DESC = 'YouTube search'
4678 IE_NAME = 'youtube:search'
4679 _SEARCH_KEY = 'ytsearch'
4680 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
4681 _TESTS = []
4682
4683
4684class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4685 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4686 _SEARCH_KEY = 'ytsearchdate'
4687 IE_DESC = 'YouTube search, newest videos first'
4688 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
4689
4690
4691class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
4692 IE_DESC = 'YouTube search URLs with sorting and filter support'
4693 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4694 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4695 _TESTS = [{
4696 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4697 'playlist_mincount': 5,
4698 'info_dict': {
4699 'id': 'youtube-dl test video',
4700 'title': 'youtube-dl test video',
4701 }
4702 }, {
4703 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4704 'playlist_mincount': 5,
4705 'info_dict': {
4706 'id': 'python',
4707 'title': 'python',
4708 }
4709
4710 }, {
4711 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4712 'only_matching': True,
4713 }]
4714
4715 def _real_extract(self, url):
4716 qs = parse_qs(url)
4717 query = (qs.get('search_query') or qs.get('q'))[0]
4718 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
4719
4720
4721class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4722 """
4723 Base class for feed extractors
4724 Subclasses must define the _FEED_NAME property.
4725 """
4726 _LOGIN_REQUIRED = True
4727 _TESTS = []
4728
4729 @property
4730 def IE_NAME(self):
4731 return 'youtube:%s' % self._FEED_NAME
4732
4733 def _real_extract(self, url):
4734 return self.url_result(
4735 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4736 ie=YoutubeTabIE.ie_key())
4737
4738
4739class YoutubeWatchLaterIE(InfoExtractor):
4740 IE_NAME = 'youtube:watchlater'
4741 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
4742 _VALID_URL = r':ytwatchlater'
4743 _TESTS = [{
4744 'url': ':ytwatchlater',
4745 'only_matching': True,
4746 }]
4747
4748 def _real_extract(self, url):
4749 return self.url_result(
4750 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4751
4752
4753class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4754 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
4755 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4756 _FEED_NAME = 'recommended'
4757 _LOGIN_REQUIRED = False
4758 _TESTS = [{
4759 'url': ':ytrec',
4760 'only_matching': True,
4761 }, {
4762 'url': ':ytrecommended',
4763 'only_matching': True,
4764 }, {
4765 'url': 'https://youtube.com',
4766 'only_matching': True,
4767 }]
4768
4769
4770class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4771 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
4772 _VALID_URL = r':ytsub(?:scription)?s?'
4773 _FEED_NAME = 'subscriptions'
4774 _TESTS = [{
4775 'url': ':ytsubs',
4776 'only_matching': True,
4777 }, {
4778 'url': ':ytsubscriptions',
4779 'only_matching': True,
4780 }]
4781
4782
4783class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4784 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
4785 _VALID_URL = r':ythis(?:tory)?'
4786 _FEED_NAME = 'history'
4787 _TESTS = [{
4788 'url': ':ythistory',
4789 'only_matching': True,
4790 }]
4791
4792
4793class YoutubeTruncatedURLIE(InfoExtractor):
4794 IE_NAME = 'youtube:truncated_url'
4795 IE_DESC = False # Do not list
4796 _VALID_URL = r'''(?x)
4797 (?:https?://)?
4798 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4799 (?:watch\?(?:
4800 feature=[a-z_]+|
4801 annotation_id=annotation_[^&]+|
4802 x-yt-cl=[0-9]+|
4803 hl=[^&]*|
4804 t=[0-9]+
4805 )?
4806 |
4807 attribution_link\?a=[^&]+
4808 )
4809 $
4810 '''
4811
4812 _TESTS = [{
4813 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4814 'only_matching': True,
4815 }, {
4816 'url': 'https://www.youtube.com/watch?',
4817 'only_matching': True,
4818 }, {
4819 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4820 'only_matching': True,
4821 }, {
4822 'url': 'https://www.youtube.com/watch?feature=foo',
4823 'only_matching': True,
4824 }, {
4825 'url': 'https://www.youtube.com/watch?hl=en-GB',
4826 'only_matching': True,
4827 }, {
4828 'url': 'https://www.youtube.com/watch?t=2372',
4829 'only_matching': True,
4830 }]
4831
4832 def _real_extract(self, url):
4833 raise ExtractorError(
4834 'Did you forget to quote the URL? Remember that & is a meta '
4835 'character in most shells, so you want to put the URL in quotes, '
4836 'like youtube-dl '
4837 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4838 ' or simply youtube-dl BaW_jenozKc .',
4839 expected=True)
4840
4841
4842class YoutubeClipIE(InfoExtractor):
4843 IE_NAME = 'youtube:clip'
4844 IE_DESC = False # Do not list
4845 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4846
4847 def _real_extract(self, url):
4848 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4849 return self.url_result(url, 'Generic')
4850
4851
4852class YoutubeTruncatedIDIE(InfoExtractor):
4853 IE_NAME = 'youtube:truncated_id'
4854 IE_DESC = False # Do not list
4855 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4856
4857 _TESTS = [{
4858 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4859 'only_matching': True,
4860 }]
4861
4862 def _real_extract(self, url):
4863 video_id = self._match_id(url)
4864 raise ExtractorError(
4865 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4866 expected=True)