]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/youtube.py
[zdf] Add chapter extraction (#2198)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
... / ...
CommitLineData
1# coding: utf-8
2
3from __future__ import unicode_literals
4
5import calendar
6import copy
7import datetime
8import functools
9import hashlib
10import itertools
11import json
12import math
13import os.path
14import random
15import re
16import sys
17import time
18import traceback
19import threading
20
21from .common import InfoExtractor, SearchInfoExtractor
22from ..compat import (
23 compat_chr,
24 compat_HTTPError,
25 compat_parse_qs,
26 compat_str,
27 compat_urllib_parse_unquote_plus,
28 compat_urllib_parse_urlencode,
29 compat_urllib_parse_urlparse,
30 compat_urlparse,
31)
32from ..jsinterp import JSInterpreter
33from ..utils import (
34 bug_reports_message,
35 clean_html,
36 datetime_from_str,
37 dict_get,
38 error_to_compat_str,
39 ExtractorError,
40 float_or_none,
41 format_field,
42 int_or_none,
43 is_html,
44 join_nonempty,
45 mimetype2ext,
46 network_exceptions,
47 NO_DEFAULT,
48 orderedSet,
49 parse_codecs,
50 parse_count,
51 parse_duration,
52 parse_iso8601,
53 parse_qs,
54 qualities,
55 remove_end,
56 remove_start,
57 smuggle_url,
58 str_or_none,
59 str_to_int,
60 strftime_or_none,
61 traverse_obj,
62 try_get,
63 unescapeHTML,
64 unified_strdate,
65 unsmuggle_url,
66 update_url_query,
67 url_or_none,
68 urljoin,
69 variadic,
70)
71
72
73def get_first(obj, keys, **kwargs):
74 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
75
76
77# any clients starting with _ cannot be explicity requested by the user
78INNERTUBE_CLIENTS = {
79 'web': {
80 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
81 'INNERTUBE_CONTEXT': {
82 'client': {
83 'clientName': 'WEB',
84 'clientVersion': '2.20210622.10.00',
85 }
86 },
87 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
88 },
89 'web_embedded': {
90 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
91 'INNERTUBE_CONTEXT': {
92 'client': {
93 'clientName': 'WEB_EMBEDDED_PLAYER',
94 'clientVersion': '1.20210620.0.1',
95 },
96 },
97 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
98 },
99 'web_music': {
100 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
101 'INNERTUBE_HOST': 'music.youtube.com',
102 'INNERTUBE_CONTEXT': {
103 'client': {
104 'clientName': 'WEB_REMIX',
105 'clientVersion': '1.20210621.00.00',
106 }
107 },
108 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
109 },
110 'web_creator': {
111 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
112 'INNERTUBE_CONTEXT': {
113 'client': {
114 'clientName': 'WEB_CREATOR',
115 'clientVersion': '1.20210621.00.00',
116 }
117 },
118 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
119 },
120 'android': {
121 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
122 'INNERTUBE_CONTEXT': {
123 'client': {
124 'clientName': 'ANDROID',
125 'clientVersion': '16.20',
126 }
127 },
128 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
129 'REQUIRE_JS_PLAYER': False
130 },
131 'android_embedded': {
132 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
133 'INNERTUBE_CONTEXT': {
134 'client': {
135 'clientName': 'ANDROID_EMBEDDED_PLAYER',
136 'clientVersion': '16.20',
137 },
138 },
139 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
140 'REQUIRE_JS_PLAYER': False
141 },
142 'android_music': {
143 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
144 'INNERTUBE_HOST': 'music.youtube.com',
145 'INNERTUBE_CONTEXT': {
146 'client': {
147 'clientName': 'ANDROID_MUSIC',
148 'clientVersion': '4.32',
149 }
150 },
151 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
152 'REQUIRE_JS_PLAYER': False
153 },
154 'android_creator': {
155 'INNERTUBE_CONTEXT': {
156 'client': {
157 'clientName': 'ANDROID_CREATOR',
158 'clientVersion': '21.24.100',
159 },
160 },
161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
162 'REQUIRE_JS_PLAYER': False
163 },
164 # ios has HLS live streams
165 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
166 'ios': {
167 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
168 'INNERTUBE_CONTEXT': {
169 'client': {
170 'clientName': 'IOS',
171 'clientVersion': '16.20',
172 }
173 },
174 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
175 'REQUIRE_JS_PLAYER': False
176 },
177 'ios_embedded': {
178 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
179 'INNERTUBE_CONTEXT': {
180 'client': {
181 'clientName': 'IOS_MESSAGES_EXTENSION',
182 'clientVersion': '16.20',
183 },
184 },
185 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
186 'REQUIRE_JS_PLAYER': False
187 },
188 'ios_music': {
189 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
190 'INNERTUBE_HOST': 'music.youtube.com',
191 'INNERTUBE_CONTEXT': {
192 'client': {
193 'clientName': 'IOS_MUSIC',
194 'clientVersion': '4.32',
195 },
196 },
197 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
198 'REQUIRE_JS_PLAYER': False
199 },
200 'ios_creator': {
201 'INNERTUBE_CONTEXT': {
202 'client': {
203 'clientName': 'IOS_CREATOR',
204 'clientVersion': '21.24.100',
205 },
206 },
207 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
208 'REQUIRE_JS_PLAYER': False
209 },
210 # mweb has 'ultralow' formats
211 # See: https://github.com/yt-dlp/yt-dlp/pull/557
212 'mweb': {
213 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
214 'INNERTUBE_CONTEXT': {
215 'client': {
216 'clientName': 'MWEB',
217 'clientVersion': '2.20210721.07.00',
218 }
219 },
220 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
221 },
222}
223
224
225def build_innertube_clients():
226 third_party = {
227 'embedUrl': 'https://google.com', # Can be any valid URL
228 }
229 base_clients = ('android', 'web', 'ios', 'mweb')
230 priority = qualities(base_clients[::-1])
231
232 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
233 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
234 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
235 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
236 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
237 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
238
239 if client in base_clients:
240 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
241 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
242 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
243 agegate_ytcfg['priority'] -= 1
244 elif client.endswith('_embedded'):
245 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
246 ytcfg['priority'] -= 2
247 else:
248 ytcfg['priority'] -= 3
249
250
251build_innertube_clients()
252
253
254class YoutubeBaseInfoExtractor(InfoExtractor):
255 """Provide base functions for Youtube extractors"""
256
257 _RESERVED_NAMES = (
258 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
259 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
260 r'browse|oembed|get_video_info|iframe_api|s/player|'
261 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
262
263 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
264
265 _NETRC_MACHINE = 'youtube'
266
267 # If True it will raise an error if no login info is provided
268 _LOGIN_REQUIRED = False
269
270 _INVIDIOUS_SITES = (
271 # invidious-redirect websites
272 r'(?:www\.)?redirect\.invidious\.io',
273 r'(?:(?:www|dev)\.)?invidio\.us',
274 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
275 r'(?:www\.)?invidious\.pussthecat\.org',
276 r'(?:www\.)?invidious\.zee\.li',
277 r'(?:www\.)?invidious\.ethibox\.fr',
278 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
279 # youtube-dl invidious instances list
280 r'(?:(?:www|no)\.)?invidiou\.sh',
281 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
282 r'(?:www\.)?invidious\.kabi\.tk',
283 r'(?:www\.)?invidious\.mastodon\.host',
284 r'(?:www\.)?invidious\.zapashcanon\.fr',
285 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
286 r'(?:www\.)?invidious\.tinfoil-hat\.net',
287 r'(?:www\.)?invidious\.himiko\.cloud',
288 r'(?:www\.)?invidious\.reallyancient\.tech',
289 r'(?:www\.)?invidious\.tube',
290 r'(?:www\.)?invidiou\.site',
291 r'(?:www\.)?invidious\.site',
292 r'(?:www\.)?invidious\.xyz',
293 r'(?:www\.)?invidious\.nixnet\.xyz',
294 r'(?:www\.)?invidious\.048596\.xyz',
295 r'(?:www\.)?invidious\.drycat\.fr',
296 r'(?:www\.)?inv\.skyn3t\.in',
297 r'(?:www\.)?tube\.poal\.co',
298 r'(?:www\.)?tube\.connect\.cafe',
299 r'(?:www\.)?vid\.wxzm\.sx',
300 r'(?:www\.)?vid\.mint\.lgbt',
301 r'(?:www\.)?vid\.puffyan\.us',
302 r'(?:www\.)?yewtu\.be',
303 r'(?:www\.)?yt\.elukerio\.org',
304 r'(?:www\.)?yt\.lelux\.fi',
305 r'(?:www\.)?invidious\.ggc-project\.de',
306 r'(?:www\.)?yt\.maisputain\.ovh',
307 r'(?:www\.)?ytprivate\.com',
308 r'(?:www\.)?invidious\.13ad\.de',
309 r'(?:www\.)?invidious\.toot\.koeln',
310 r'(?:www\.)?invidious\.fdn\.fr',
311 r'(?:www\.)?watch\.nettohikari\.com',
312 r'(?:www\.)?invidious\.namazso\.eu',
313 r'(?:www\.)?invidious\.silkky\.cloud',
314 r'(?:www\.)?invidious\.exonip\.de',
315 r'(?:www\.)?invidious\.riverside\.rocks',
316 r'(?:www\.)?invidious\.blamefran\.net',
317 r'(?:www\.)?invidious\.moomoo\.de',
318 r'(?:www\.)?ytb\.trom\.tf',
319 r'(?:www\.)?yt\.cyberhost\.uk',
320 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
321 r'(?:www\.)?qklhadlycap4cnod\.onion',
322 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
323 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
324 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
325 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
326 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
327 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
328 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
329 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
330 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
331 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
332 )
333
334 def _login(self):
335 """
336 Attempt to log in to YouTube.
337 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
338 """
339
340 if (self._LOGIN_REQUIRED
341 and self.get_param('cookiefile') is None
342 and self.get_param('cookiesfrombrowser') is None):
343 self.raise_login_required(
344 'Login details are needed to download this content', method='cookies')
345 username, password = self._get_login_info()
346 if username:
347 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
348
349 def _initialize_consent(self):
350 cookies = self._get_cookies('https://www.youtube.com/')
351 if cookies.get('__Secure-3PSID'):
352 return
353 consent_id = None
354 consent = cookies.get('CONSENT')
355 if consent:
356 if 'YES' in consent.value:
357 return
358 consent_id = self._search_regex(
359 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
360 if not consent_id:
361 consent_id = random.randint(100, 999)
362 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
363
364 def _initialize_pref(self):
365 cookies = self._get_cookies('https://www.youtube.com/')
366 pref_cookie = cookies.get('PREF')
367 pref = {}
368 if pref_cookie:
369 try:
370 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
371 except ValueError:
372 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
373 pref.update({'hl': 'en'})
374 self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
375
376 def _real_initialize(self):
377 self._initialize_pref()
378 self._initialize_consent()
379 self._login()
380
381 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
382 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
383 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
384
385 def _get_default_ytcfg(self, client='web'):
386 return copy.deepcopy(INNERTUBE_CLIENTS[client])
387
388 def _get_innertube_host(self, client='web'):
389 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
390
391 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
392 # try_get but with fallback to default ytcfg client values when present
393 _func = lambda y: try_get(y, getter, expected_type)
394 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
395
396 def _extract_client_name(self, ytcfg, default_client='web'):
397 return self._ytcfg_get_safe(
398 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
399 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
400
401 def _extract_client_version(self, ytcfg, default_client='web'):
402 return self._ytcfg_get_safe(
403 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
404 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
405
406 def _extract_api_key(self, ytcfg=None, default_client='web'):
407 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
408
409 def _extract_context(self, ytcfg=None, default_client='web'):
410 context = get_first(
411 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
412 # Enforce language for extraction
413 traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'
414 return context
415
416 _SAPISID = None
417
418 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
419 time_now = round(time.time())
420 if self._SAPISID is None:
421 yt_cookies = self._get_cookies('https://www.youtube.com')
422 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
423 # See: https://github.com/yt-dlp/yt-dlp/issues/393
424 sapisid_cookie = dict_get(
425 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
426 if sapisid_cookie and sapisid_cookie.value:
427 self._SAPISID = sapisid_cookie.value
428 self.write_debug('Extracted SAPISID cookie')
429 # SAPISID cookie is required if not already present
430 if not yt_cookies.get('SAPISID'):
431 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
432 self._set_cookie(
433 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
434 else:
435 self._SAPISID = False
436 if not self._SAPISID:
437 return None
438 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
439 sapisidhash = hashlib.sha1(
440 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
441 return f'SAPISIDHASH {time_now}_{sapisidhash}'
442
443 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
444 note='Downloading API JSON', errnote='Unable to download API page',
445 context=None, api_key=None, api_hostname=None, default_client='web'):
446
447 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
448 data.update(query)
449 real_headers = self.generate_api_headers(default_client=default_client)
450 real_headers.update({'content-type': 'application/json'})
451 if headers:
452 real_headers.update(headers)
453 return self._download_json(
454 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
455 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
456 data=json.dumps(data).encode('utf8'), headers=real_headers,
457 query={'key': api_key or self._extract_api_key()})
458
459 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
460 data = self._search_regex(
461 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
462 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
463 if data:
464 return self._parse_json(data, item_id, fatal=fatal)
465
466 @staticmethod
467 def _extract_session_index(*data):
468 """
469 Index of current account in account list.
470 See: https://github.com/yt-dlp/yt-dlp/pull/519
471 """
472 for ytcfg in data:
473 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
474 if session_index is not None:
475 return session_index
476
477 # Deprecated?
478 def _extract_identity_token(self, ytcfg=None, webpage=None):
479 if ytcfg:
480 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
481 if token:
482 return token
483 if webpage:
484 return self._search_regex(
485 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
486 'identity token', default=None, fatal=False)
487
488 @staticmethod
489 def _extract_account_syncid(*args):
490 """
491 Extract syncId required to download private playlists of secondary channels
492 @params response and/or ytcfg
493 """
494 for data in args:
495 # ytcfg includes channel_syncid if on secondary channel
496 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
497 if delegated_sid:
498 return delegated_sid
499 sync_ids = (try_get(
500 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
501 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
502 if len(sync_ids) >= 2 and sync_ids[1]:
503 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
504 # and just "user_syncid||" for primary channel. We only want the channel_syncid
505 return sync_ids[0]
506
507 @staticmethod
508 def _extract_visitor_data(*args):
509 """
510 Extracts visitorData from an API response or ytcfg
511 Appears to be used to track session state
512 """
513 return get_first(
514 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
515 expected_type=str)
516
517 @property
518 def is_authenticated(self):
519 return bool(self._generate_sapisidhash_header())
520
521 def extract_ytcfg(self, video_id, webpage):
522 if not webpage:
523 return {}
524 return self._parse_json(
525 self._search_regex(
526 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
527 default='{}'), video_id, fatal=False) or {}
528
529 def generate_api_headers(
530 self, *, ytcfg=None, account_syncid=None, session_index=None,
531 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
532
533 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
534 headers = {
535 'X-YouTube-Client-Name': compat_str(
536 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
537 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
538 'Origin': origin,
539 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
540 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
541 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
542 }
543 if session_index is None:
544 session_index = self._extract_session_index(ytcfg)
545 if account_syncid or session_index is not None:
546 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
547
548 auth = self._generate_sapisidhash_header(origin)
549 if auth is not None:
550 headers['Authorization'] = auth
551 headers['X-Origin'] = origin
552 return {h: v for h, v in headers.items() if v is not None}
553
554 @staticmethod
555 def _build_api_continuation_query(continuation, ctp=None):
556 query = {
557 'continuation': continuation
558 }
559 # TODO: Inconsistency with clickTrackingParams.
560 # Currently we have a fixed ctp contained within context (from ytcfg)
561 # and a ctp in root query for continuation.
562 if ctp:
563 query['clickTracking'] = {'clickTrackingParams': ctp}
564 return query
565
566 @classmethod
567 def _extract_next_continuation_data(cls, renderer):
568 next_continuation = try_get(
569 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
570 lambda x: x['continuation']['reloadContinuationData']), dict)
571 if not next_continuation:
572 return
573 continuation = next_continuation.get('continuation')
574 if not continuation:
575 return
576 ctp = next_continuation.get('clickTrackingParams')
577 return cls._build_api_continuation_query(continuation, ctp)
578
579 @classmethod
580 def _extract_continuation_ep_data(cls, continuation_ep: dict):
581 if isinstance(continuation_ep, dict):
582 continuation = try_get(
583 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
584 if not continuation:
585 return
586 ctp = continuation_ep.get('clickTrackingParams')
587 return cls._build_api_continuation_query(continuation, ctp)
588
589 @classmethod
590 def _extract_continuation(cls, renderer):
591 next_continuation = cls._extract_next_continuation_data(renderer)
592 if next_continuation:
593 return next_continuation
594
595 contents = []
596 for key in ('contents', 'items'):
597 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
598
599 for content in contents:
600 if not isinstance(content, dict):
601 continue
602 continuation_ep = try_get(
603 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
604 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
605 dict)
606 continuation = cls._extract_continuation_ep_data(continuation_ep)
607 if continuation:
608 return continuation
609
610 @classmethod
611 def _extract_alerts(cls, data):
612 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
613 if not isinstance(alert_dict, dict):
614 continue
615 for alert in alert_dict.values():
616 alert_type = alert.get('type')
617 if not alert_type:
618 continue
619 message = cls._get_text(alert, 'text')
620 if message:
621 yield alert_type, message
622
623 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
624 errors = []
625 warnings = []
626 for alert_type, alert_message in alerts:
627 if alert_type.lower() == 'error' and fatal:
628 errors.append([alert_type, alert_message])
629 else:
630 warnings.append([alert_type, alert_message])
631
632 for alert_type, alert_message in (warnings + errors[:-1]):
633 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
634 if errors:
635 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
636
637 def _extract_and_report_alerts(self, data, *args, **kwargs):
638 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
639
640 def _extract_badges(self, renderer: dict):
641 badges = set()
642 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
643 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
644 if label:
645 badges.add(label.lower())
646 return badges
647
648 @staticmethod
649 def _get_text(data, *path_list, max_runs=None):
650 for path in path_list or [None]:
651 if path is None:
652 obj = [data]
653 else:
654 obj = traverse_obj(data, path, default=[])
655 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
656 obj = [obj]
657 for item in obj:
658 text = try_get(item, lambda x: x['simpleText'], compat_str)
659 if text:
660 return text
661 runs = try_get(item, lambda x: x['runs'], list) or []
662 if not runs and isinstance(item, list):
663 runs = item
664
665 runs = runs[:min(len(runs), max_runs or len(runs))]
666 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
667 if text:
668 return text
669
670 @staticmethod
671 def _extract_thumbnails(data, *path_list):
672 """
673 Extract thumbnails from thumbnails dict
674 @param path_list: path list to level that contains 'thumbnails' key
675 """
676 thumbnails = []
677 for path in path_list or [()]:
678 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
679 thumbnail_url = url_or_none(thumbnail.get('url'))
680 if not thumbnail_url:
681 continue
682 # Sometimes youtube gives a wrong thumbnail URL. See:
683 # https://github.com/yt-dlp/yt-dlp/issues/233
684 # https://github.com/ytdl-org/youtube-dl/issues/28023
685 if 'maxresdefault' in thumbnail_url:
686 thumbnail_url = thumbnail_url.split('?')[0]
687 thumbnails.append({
688 'url': thumbnail_url,
689 'height': int_or_none(thumbnail.get('height')),
690 'width': int_or_none(thumbnail.get('width')),
691 })
692 return thumbnails
693
694 @staticmethod
695 def extract_relative_time(relative_time_text):
696 """
697 Extracts a relative time from string and converts to dt object
698 e.g. 'streamed 6 days ago', '5 seconds ago (edited)'
699 """
700 mobj = re.search(r'(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
701 if mobj:
702 try:
703 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')), precision='auto')
704 except ValueError:
705 return None
706
707 def _extract_time_text(self, renderer, *path_list):
708 text = self._get_text(renderer, *path_list) or ''
709 dt = self.extract_relative_time(text)
710 timestamp = None
711 if isinstance(dt, datetime.datetime):
712 timestamp = calendar.timegm(dt.timetuple())
713 if text and timestamp is None:
714 self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)
715 return timestamp, text
716
717 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
718 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
719 default_client='web'):
720 response = None
721 last_error = None
722 count = -1
723 retries = self.get_param('extractor_retries', 3)
724 if check_get_keys is None:
725 check_get_keys = []
726 while count < retries:
727 count += 1
728 if last_error:
729 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
730 try:
731 response = self._call_api(
732 ep=ep, fatal=True, headers=headers,
733 video_id=item_id, query=query,
734 context=self._extract_context(ytcfg, default_client),
735 api_key=self._extract_api_key(ytcfg, default_client),
736 api_hostname=api_hostname, default_client=default_client,
737 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
738 except ExtractorError as e:
739 if isinstance(e.cause, network_exceptions):
740 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
741 e.cause.seek(0)
742 yt_error = try_get(
743 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
744 lambda x: x['error']['message'], compat_str)
745 if yt_error:
746 self._report_alerts([('ERROR', yt_error)], fatal=False)
747 # Downloading page may result in intermittent 5xx HTTP error
748 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
749 # We also want to catch all other network exceptions since errors in later pages can be troublesome
750 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
751 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
752 last_error = error_to_compat_str(e.cause or e.msg)
753 if count < retries:
754 continue
755 if fatal:
756 raise
757 else:
758 self.report_warning(error_to_compat_str(e))
759 return
760
761 else:
762 try:
763 self._extract_and_report_alerts(response, only_once=True)
764 except ExtractorError as e:
765 # YouTube servers may return errors we want to retry on in a 200 OK response
766 # See: https://github.com/yt-dlp/yt-dlp/issues/839
767 if 'unknown error' in e.msg.lower():
768 last_error = e.msg
769 continue
770 if fatal:
771 raise
772 self.report_warning(error_to_compat_str(e))
773 return
774 if not check_get_keys or dict_get(response, check_get_keys):
775 break
776 # Youtube sometimes sends incomplete data
777 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
778 last_error = 'Incomplete data received'
779 if count >= retries:
780 if fatal:
781 raise ExtractorError(last_error)
782 else:
783 self.report_warning(last_error)
784 return
785 return response
786
787 @staticmethod
788 def is_music_url(url):
789 return re.match(r'https?://music\.youtube\.com/', url) is not None
790
791 def _extract_video(self, renderer):
792 video_id = renderer.get('videoId')
793 title = self._get_text(renderer, 'title')
794 description = self._get_text(renderer, 'descriptionSnippet')
795 duration = parse_duration(self._get_text(
796 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
797 view_count_text = self._get_text(renderer, 'viewCountText') or ''
798 view_count = str_to_int(self._search_regex(
799 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
800 'view count', default=None))
801
802 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
803 channel_id = traverse_obj(
804 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)
805 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
806 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
807 overlay_style = traverse_obj(
808 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
809 badges = self._extract_badges(renderer)
810 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
811
812 return {
813 '_type': 'url',
814 'ie_key': YoutubeIE.ie_key(),
815 'id': video_id,
816 'url': f'https://www.youtube.com/watch?v={video_id}',
817 'title': title,
818 'description': description,
819 'duration': duration,
820 'view_count': view_count,
821 'uploader': uploader,
822 'channel_id': channel_id,
823 'thumbnails': thumbnails,
824 'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
825 'live_status': ('is_upcoming' if scheduled_timestamp is not None
826 else 'was_live' if 'streamed' in time_text.lower()
827 else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
828 else None),
829 'release_timestamp': scheduled_timestamp,
830 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
831 }
832
833
834class YoutubeIE(YoutubeBaseInfoExtractor):
835 IE_DESC = 'YouTube'
836 _VALID_URL = r"""(?x)^
837 (
838 (?:https?://|//) # http(s):// or protocol-independent URL
839 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
840 (?:www\.)?deturl\.com/www\.youtube\.com|
841 (?:www\.)?pwnyoutube\.com|
842 (?:www\.)?hooktube\.com|
843 (?:www\.)?yourepeat\.com|
844 tube\.majestyc\.net|
845 %(invidious)s|
846 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
847 (?:.*?\#/)? # handle anchor (#/) redirect urls
848 (?: # the various things that can precede the ID:
849 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
850 |(?: # or the v= param in all its forms
851 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
852 (?:\?|\#!?) # the params delimiter ? or # or #!
853 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
854 v=
855 )
856 ))
857 |(?:
858 youtu\.be| # just youtu.be/xxxx
859 vid\.plus| # or vid.plus/xxxx
860 zwearz\.com/watch| # or zwearz.com/watch/xxxx
861 %(invidious)s
862 )/
863 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
864 )
865 )? # all until now is optional -> you can pass the naked ID
866 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
867 (?(1).+)? # if we found the ID, everything can follow
868 (?:\#|$)""" % {
869 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
870 }
871 _PLAYER_INFO_RE = (
872 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
873 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
874 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
875 )
876 _formats = {
877 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
878 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
879 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
880 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
881 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
882 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
883 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
884 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
885 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
886 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
887 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
888 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
889 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
890 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
891 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
892 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
893 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
894 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
895
896
897 # 3D videos
898 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
899 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
900 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
901 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
902 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
903 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
904 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
905
906 # Apple HTTP Live Streaming
907 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
908 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
909 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
910 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
911 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
912 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
913 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
914 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
915
916 # DASH mp4 video
917 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
918 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
919 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
920 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
921 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
922 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
923 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
924 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
925 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
926 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
927 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
928 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
929
930 # Dash mp4 audio
931 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
932 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
933 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
934 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
935 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
936 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
937 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
938
939 # Dash webm
940 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
941 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
942 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
943 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
944 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
945 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
946 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
947 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
948 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
949 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
950 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
951 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
952 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
953 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
954 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
955 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
956 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
957 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
958 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
959 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
960 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
961 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
962
963 # Dash webm audio
964 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
965 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
966
967 # Dash webm audio with opus inside
968 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
969 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
970 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
971
972 # RTMP (unnamed)
973 '_rtmp': {'protocol': 'rtmp'},
974
975 # av01 video only formats sometimes served with "unknown" codecs
976 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
977 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
978 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
979 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
980 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
981 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
982 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
983 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
984 }
985 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
986
987 _GEO_BYPASS = False
988
989 IE_NAME = 'youtube'
990 _TESTS = [
991 {
992 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
993 'info_dict': {
994 'id': 'BaW_jenozKc',
995 'ext': 'mp4',
996 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
997 'uploader': 'Philipp Hagemeister',
998 'uploader_id': 'phihag',
999 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1000 'channel': 'Philipp Hagemeister',
1001 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1002 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1003 'upload_date': '20121002',
1004 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1005 'categories': ['Science & Technology'],
1006 'tags': ['youtube-dl'],
1007 'duration': 10,
1008 'view_count': int,
1009 'like_count': int,
1010 # 'dislike_count': int,
1011 'availability': 'public',
1012 'playable_in_embed': True,
1013 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1014 'live_status': 'not_live',
1015 'age_limit': 0,
1016 'start_time': 1,
1017 'end_time': 9,
1018 }
1019 },
1020 {
1021 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1022 'note': 'Embed-only video (#1746)',
1023 'info_dict': {
1024 'id': 'yZIXLfi8CZQ',
1025 'ext': 'mp4',
1026 'upload_date': '20120608',
1027 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1028 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1029 'uploader': 'SET India',
1030 'uploader_id': 'setindia',
1031 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1032 'age_limit': 18,
1033 },
1034 'skip': 'Private video',
1035 },
1036 {
1037 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1038 'note': 'Use the first video ID in the URL',
1039 'info_dict': {
1040 'id': 'BaW_jenozKc',
1041 'ext': 'mp4',
1042 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1043 'uploader': 'Philipp Hagemeister',
1044 'uploader_id': 'phihag',
1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1046 'upload_date': '20121002',
1047 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1048 'categories': ['Science & Technology'],
1049 'tags': ['youtube-dl'],
1050 'duration': 10,
1051 'view_count': int,
1052 'like_count': int,
1053 'dislike_count': int,
1054 },
1055 'params': {
1056 'skip_download': True,
1057 },
1058 },
1059 {
1060 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1061 'note': '256k DASH audio (format 141) via DASH manifest',
1062 'info_dict': {
1063 'id': 'a9LDPn-MO4I',
1064 'ext': 'm4a',
1065 'upload_date': '20121002',
1066 'uploader_id': '8KVIDEO',
1067 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1068 'description': '',
1069 'uploader': '8KVIDEO',
1070 'title': 'UHDTV TEST 8K VIDEO.mp4'
1071 },
1072 'params': {
1073 'youtube_include_dash_manifest': True,
1074 'format': '141',
1075 },
1076 'skip': 'format 141 not served anymore',
1077 },
1078 # DASH manifest with encrypted signature
1079 {
1080 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1081 'info_dict': {
1082 'id': 'IB3lcPjvWLA',
1083 'ext': 'm4a',
1084 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1085 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1086 'duration': 244,
1087 'uploader': 'AfrojackVEVO',
1088 'uploader_id': 'AfrojackVEVO',
1089 'upload_date': '20131011',
1090 'abr': 129.495,
1091 },
1092 'params': {
1093 'youtube_include_dash_manifest': True,
1094 'format': '141/bestaudio[ext=m4a]',
1095 },
1096 },
1097 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1098 {
1099 'note': 'Embed allowed age-gate video',
1100 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1101 'info_dict': {
1102 'id': 'HtVdAasjOgU',
1103 'ext': 'mp4',
1104 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1105 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1106 'duration': 142,
1107 'uploader': 'The Witcher',
1108 'uploader_id': 'WitcherGame',
1109 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1110 'upload_date': '20140605',
1111 'age_limit': 18,
1112 },
1113 },
1114 {
1115 'note': 'Age-gate video with embed allowed in public site',
1116 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1117 'info_dict': {
1118 'id': 'HsUATh_Nc2U',
1119 'ext': 'mp4',
1120 'title': 'Godzilla 2 (Official Video)',
1121 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1122 'upload_date': '20200408',
1123 'uploader_id': 'FlyingKitty900',
1124 'uploader': 'FlyingKitty',
1125 'age_limit': 18,
1126 },
1127 },
1128 {
1129 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1130 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1131 'info_dict': {
1132 'id': 'Tq92D6wQ1mg',
1133 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1134 'ext': 'mp4',
1135 'upload_date': '20191227',
1136 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1137 'uploader': 'Projekt Melody',
1138 'description': 'md5:17eccca93a786d51bc67646756894066',
1139 'age_limit': 18,
1140 },
1141 },
1142 {
1143 'note': 'Non-Agegated non-embeddable video',
1144 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1145 'info_dict': {
1146 'id': 'MeJVWBSsPAY',
1147 'ext': 'mp4',
1148 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1149 'uploader': 'Herr Lurik',
1150 'uploader_id': 'st3in234',
1151 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1152 'upload_date': '20130730',
1153 },
1154 },
1155 {
1156 'note': 'Non-bypassable age-gated video',
1157 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1158 'only_matching': True,
1159 },
1160 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1161 # YouTube Red ad is not captured for creator
1162 {
1163 'url': '__2ABJjxzNo',
1164 'info_dict': {
1165 'id': '__2ABJjxzNo',
1166 'ext': 'mp4',
1167 'duration': 266,
1168 'upload_date': '20100430',
1169 'uploader_id': 'deadmau5',
1170 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1171 'creator': 'deadmau5',
1172 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1173 'uploader': 'deadmau5',
1174 'title': 'Deadmau5 - Some Chords (HD)',
1175 'alt_title': 'Some Chords',
1176 },
1177 'expected_warnings': [
1178 'DASH manifest missing',
1179 ]
1180 },
1181 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1182 {
1183 'url': 'lqQg6PlCWgI',
1184 'info_dict': {
1185 'id': 'lqQg6PlCWgI',
1186 'ext': 'mp4',
1187 'duration': 6085,
1188 'upload_date': '20150827',
1189 'uploader_id': 'olympic',
1190 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1191 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1192 'uploader': 'Olympics',
1193 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1194 },
1195 'params': {
1196 'skip_download': 'requires avconv',
1197 }
1198 },
1199 # Non-square pixels
1200 {
1201 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1202 'info_dict': {
1203 'id': '_b-2C3KPAM0',
1204 'ext': 'mp4',
1205 'stretched_ratio': 16 / 9.,
1206 'duration': 85,
1207 'upload_date': '20110310',
1208 'uploader_id': 'AllenMeow',
1209 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1210 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1211 'uploader': '孫ᄋᄅ',
1212 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1213 },
1214 },
1215 # url_encoded_fmt_stream_map is empty string
1216 {
1217 'url': 'qEJwOuvDf7I',
1218 'info_dict': {
1219 'id': 'qEJwOuvDf7I',
1220 'ext': 'webm',
1221 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1222 'description': '',
1223 'upload_date': '20150404',
1224 'uploader_id': 'spbelect',
1225 'uploader': 'Наблюдатели Петербурга',
1226 },
1227 'params': {
1228 'skip_download': 'requires avconv',
1229 },
1230 'skip': 'This live event has ended.',
1231 },
1232 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1233 {
1234 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1235 'info_dict': {
1236 'id': 'FIl7x6_3R5Y',
1237 'ext': 'webm',
1238 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1239 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1240 'duration': 220,
1241 'upload_date': '20150625',
1242 'uploader_id': 'dorappi2000',
1243 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1244 'uploader': 'dorappi2000',
1245 'formats': 'mincount:31',
1246 },
1247 'skip': 'not actual anymore',
1248 },
1249 # DASH manifest with segment_list
1250 {
1251 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1252 'md5': '8ce563a1d667b599d21064e982ab9e31',
1253 'info_dict': {
1254 'id': 'CsmdDsKjzN8',
1255 'ext': 'mp4',
1256 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1257 'uploader': 'Airtek',
1258 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1259 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1260 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1261 },
1262 'params': {
1263 'youtube_include_dash_manifest': True,
1264 'format': '135', # bestvideo
1265 },
1266 'skip': 'This live event has ended.',
1267 },
1268 {
1269 # Multifeed videos (multiple cameras), URL is for Main Camera
1270 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1271 'info_dict': {
1272 'id': 'jvGDaLqkpTg',
1273 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1274 'description': 'md5:e03b909557865076822aa169218d6a5d',
1275 },
1276 'playlist': [{
1277 'info_dict': {
1278 'id': 'jvGDaLqkpTg',
1279 'ext': 'mp4',
1280 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1281 'description': 'md5:e03b909557865076822aa169218d6a5d',
1282 'duration': 10643,
1283 'upload_date': '20161111',
1284 'uploader': 'Team PGP',
1285 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1286 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1287 },
1288 }, {
1289 'info_dict': {
1290 'id': '3AKt1R1aDnw',
1291 'ext': 'mp4',
1292 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1293 'description': 'md5:e03b909557865076822aa169218d6a5d',
1294 'duration': 10991,
1295 'upload_date': '20161111',
1296 'uploader': 'Team PGP',
1297 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1299 },
1300 }, {
1301 'info_dict': {
1302 'id': 'RtAMM00gpVc',
1303 'ext': 'mp4',
1304 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1305 'description': 'md5:e03b909557865076822aa169218d6a5d',
1306 'duration': 10995,
1307 'upload_date': '20161111',
1308 'uploader': 'Team PGP',
1309 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1310 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1311 },
1312 }, {
1313 'info_dict': {
1314 'id': '6N2fdlP3C5U',
1315 'ext': 'mp4',
1316 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1317 'description': 'md5:e03b909557865076822aa169218d6a5d',
1318 'duration': 10990,
1319 'upload_date': '20161111',
1320 'uploader': 'Team PGP',
1321 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1322 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1323 },
1324 }],
1325 'params': {
1326 'skip_download': True,
1327 },
1328 'skip': 'Not multifeed anymore',
1329 },
1330 {
1331 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1332 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1333 'info_dict': {
1334 'id': 'gVfLd0zydlo',
1335 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1336 },
1337 'playlist_count': 2,
1338 'skip': 'Not multifeed anymore',
1339 },
1340 {
1341 'url': 'https://vid.plus/FlRa-iH7PGw',
1342 'only_matching': True,
1343 },
1344 {
1345 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1346 'only_matching': True,
1347 },
1348 {
1349 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1350 # Also tests cut-off URL expansion in video description (see
1351 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1352 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1353 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1354 'info_dict': {
1355 'id': 'lsguqyKfVQg',
1356 'ext': 'mp4',
1357 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1358 'alt_title': 'Dark Walk',
1359 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1360 'duration': 133,
1361 'upload_date': '20151119',
1362 'uploader_id': 'IronSoulElf',
1363 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1364 'uploader': 'IronSoulElf',
1365 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1366 'track': 'Dark Walk',
1367 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1368 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1369 },
1370 'params': {
1371 'skip_download': True,
1372 },
1373 },
1374 {
1375 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1376 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1377 'only_matching': True,
1378 },
1379 {
1380 # Video with yt:stretch=17:0
1381 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1382 'info_dict': {
1383 'id': 'Q39EVAstoRM',
1384 'ext': 'mp4',
1385 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1386 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1387 'upload_date': '20151107',
1388 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1389 'uploader': 'CH GAMER DROID',
1390 },
1391 'params': {
1392 'skip_download': True,
1393 },
1394 'skip': 'This video does not exist.',
1395 },
1396 {
1397 # Video with incomplete 'yt:stretch=16:'
1398 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1399 'only_matching': True,
1400 },
1401 {
1402 # Video licensed under Creative Commons
1403 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1404 'info_dict': {
1405 'id': 'M4gD1WSo5mA',
1406 'ext': 'mp4',
1407 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1408 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1409 'duration': 721,
1410 'upload_date': '20150127',
1411 'uploader_id': 'BerkmanCenter',
1412 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1413 'uploader': 'The Berkman Klein Center for Internet & Society',
1414 'license': 'Creative Commons Attribution license (reuse allowed)',
1415 },
1416 'params': {
1417 'skip_download': True,
1418 },
1419 },
1420 {
1421 # Channel-like uploader_url
1422 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1423 'info_dict': {
1424 'id': 'eQcmzGIKrzg',
1425 'ext': 'mp4',
1426 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1427 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1428 'duration': 4060,
1429 'upload_date': '20151119',
1430 'uploader': 'Bernie Sanders',
1431 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1433 'license': 'Creative Commons Attribution license (reuse allowed)',
1434 },
1435 'params': {
1436 'skip_download': True,
1437 },
1438 },
1439 {
1440 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1441 'only_matching': True,
1442 },
1443 {
1444 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1445 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1446 'only_matching': True,
1447 },
1448 {
1449 # Rental video preview
1450 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1451 'info_dict': {
1452 'id': 'uGpuVWrhIzE',
1453 'ext': 'mp4',
1454 'title': 'Piku - Trailer',
1455 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1456 'upload_date': '20150811',
1457 'uploader': 'FlixMatrix',
1458 'uploader_id': 'FlixMatrixKaravan',
1459 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1460 'license': 'Standard YouTube License',
1461 },
1462 'params': {
1463 'skip_download': True,
1464 },
1465 'skip': 'This video is not available.',
1466 },
1467 {
1468 # YouTube Red video with episode data
1469 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1470 'info_dict': {
1471 'id': 'iqKdEhx-dD4',
1472 'ext': 'mp4',
1473 'title': 'Isolation - Mind Field (Ep 1)',
1474 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1475 'duration': 2085,
1476 'upload_date': '20170118',
1477 'uploader': 'Vsauce',
1478 'uploader_id': 'Vsauce',
1479 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1480 'series': 'Mind Field',
1481 'season_number': 1,
1482 'episode_number': 1,
1483 },
1484 'params': {
1485 'skip_download': True,
1486 },
1487 'expected_warnings': [
1488 'Skipping DASH manifest',
1489 ],
1490 },
1491 {
1492 # The following content has been identified by the YouTube community
1493 # as inappropriate or offensive to some audiences.
1494 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1495 'info_dict': {
1496 'id': '6SJNVb0GnPI',
1497 'ext': 'mp4',
1498 'title': 'Race Differences in Intelligence',
1499 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1500 'duration': 965,
1501 'upload_date': '20140124',
1502 'uploader': 'New Century Foundation',
1503 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1504 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1505 },
1506 'params': {
1507 'skip_download': True,
1508 },
1509 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1510 },
1511 {
1512 # itag 212
1513 'url': '1t24XAntNCY',
1514 'only_matching': True,
1515 },
1516 {
1517 # geo restricted to JP
1518 'url': 'sJL6WA-aGkQ',
1519 'only_matching': True,
1520 },
1521 {
1522 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1523 'only_matching': True,
1524 },
1525 {
1526 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1527 'only_matching': True,
1528 },
1529 {
1530 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1531 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1532 'only_matching': True,
1533 },
1534 {
1535 # DRM protected
1536 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1537 'only_matching': True,
1538 },
1539 {
1540 # Video with unsupported adaptive stream type formats
1541 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1542 'info_dict': {
1543 'id': 'Z4Vy8R84T1U',
1544 'ext': 'mp4',
1545 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1546 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1547 'duration': 433,
1548 'upload_date': '20130923',
1549 'uploader': 'Amelia Putri Harwita',
1550 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1551 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1552 'formats': 'maxcount:10',
1553 },
1554 'params': {
1555 'skip_download': True,
1556 'youtube_include_dash_manifest': False,
1557 },
1558 'skip': 'not actual anymore',
1559 },
1560 {
1561 # Youtube Music Auto-generated description
1562 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1563 'info_dict': {
1564 'id': 'MgNrAu2pzNs',
1565 'ext': 'mp4',
1566 'title': 'Voyeur Girl',
1567 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1568 'upload_date': '20190312',
1569 'uploader': 'Stephen - Topic',
1570 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1571 'artist': 'Stephen',
1572 'track': 'Voyeur Girl',
1573 'album': 'it\'s too much love to know my dear',
1574 'release_date': '20190313',
1575 'release_year': 2019,
1576 },
1577 'params': {
1578 'skip_download': True,
1579 },
1580 },
1581 {
1582 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1583 'only_matching': True,
1584 },
1585 {
1586 # invalid -> valid video id redirection
1587 'url': 'DJztXj2GPfl',
1588 'info_dict': {
1589 'id': 'DJztXj2GPfk',
1590 'ext': 'mp4',
1591 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1592 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1593 'upload_date': '20090125',
1594 'uploader': 'Prochorowka',
1595 'uploader_id': 'Prochorowka',
1596 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1597 'artist': 'Panjabi MC',
1598 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1599 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1600 },
1601 'params': {
1602 'skip_download': True,
1603 },
1604 'skip': 'Video unavailable',
1605 },
1606 {
1607 # empty description results in an empty string
1608 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1609 'info_dict': {
1610 'id': 'x41yOUIvK2k',
1611 'ext': 'mp4',
1612 'title': 'IMG 3456',
1613 'description': '',
1614 'upload_date': '20170613',
1615 'uploader_id': 'ElevageOrVert',
1616 'uploader': 'ElevageOrVert',
1617 },
1618 'params': {
1619 'skip_download': True,
1620 },
1621 },
1622 {
1623 # with '};' inside yt initial data (see [1])
1624 # see [2] for an example with '};' inside ytInitialPlayerResponse
1625 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1626 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1627 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1628 'info_dict': {
1629 'id': 'CHqg6qOn4no',
1630 'ext': 'mp4',
1631 'title': 'Part 77 Sort a list of simple types in c#',
1632 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1633 'upload_date': '20130831',
1634 'uploader_id': 'kudvenkat',
1635 'uploader': 'kudvenkat',
1636 },
1637 'params': {
1638 'skip_download': True,
1639 },
1640 },
1641 {
1642 # another example of '};' in ytInitialData
1643 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1644 'only_matching': True,
1645 },
1646 {
1647 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1648 'only_matching': True,
1649 },
1650 {
1651 # https://github.com/ytdl-org/youtube-dl/pull/28094
1652 'url': 'OtqTfy26tG0',
1653 'info_dict': {
1654 'id': 'OtqTfy26tG0',
1655 'ext': 'mp4',
1656 'title': 'Burn Out',
1657 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1658 'upload_date': '20141120',
1659 'uploader': 'The Cinematic Orchestra - Topic',
1660 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1661 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1662 'artist': 'The Cinematic Orchestra',
1663 'track': 'Burn Out',
1664 'album': 'Every Day',
1665 'release_data': None,
1666 'release_year': None,
1667 },
1668 'params': {
1669 'skip_download': True,
1670 },
1671 },
1672 {
1673 # controversial video, only works with bpctr when authenticated with cookies
1674 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1675 'only_matching': True,
1676 },
1677 {
1678 # controversial video, requires bpctr/contentCheckOk
1679 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1680 'info_dict': {
1681 'id': 'SZJvDhaSDnc',
1682 'ext': 'mp4',
1683 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1684 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1685 'uploader': 'CBS This Morning',
1686 'uploader_id': 'CBSThisMorning',
1687 'upload_date': '20140716',
1688 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1689 }
1690 },
1691 {
1692 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1693 'url': 'cBvYw8_A0vQ',
1694 'info_dict': {
1695 'id': 'cBvYw8_A0vQ',
1696 'ext': 'mp4',
1697 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1698 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1699 'upload_date': '20201120',
1700 'uploader': 'Walk around Japan',
1701 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1702 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1703 },
1704 'params': {
1705 'skip_download': True,
1706 },
1707 }, {
1708 # Has multiple audio streams
1709 'url': 'WaOKSUlf4TM',
1710 'only_matching': True
1711 }, {
1712 # Requires Premium: has format 141 when requested using YTM url
1713 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1714 'only_matching': True
1715 }, {
1716 # multiple subtitles with same lang_code
1717 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1718 'only_matching': True,
1719 }, {
1720 # Force use android client fallback
1721 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1722 'info_dict': {
1723 'id': 'YOelRv7fMxY',
1724 'title': 'DIGGING A SECRET TUNNEL Part 1',
1725 'ext': '3gp',
1726 'upload_date': '20210624',
1727 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1728 'uploader': 'colinfurze',
1729 'uploader_id': 'colinfurze',
1730 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1731 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1732 },
1733 'params': {
1734 'format': '17', # 3gp format available on android
1735 'extractor_args': {'youtube': {'player_client': ['android']}},
1736 },
1737 },
1738 {
1739 # Skip download of additional client configs (remix client config in this case)
1740 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1741 'only_matching': True,
1742 'params': {
1743 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1744 },
1745 }, {
1746 # shorts
1747 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1748 'only_matching': True,
1749 }, {
1750 'note': 'Storyboards',
1751 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1752 'info_dict': {
1753 'id': '5KLPxDtMqe8',
1754 'ext': 'mhtml',
1755 'format_id': 'sb0',
1756 'title': 'Your Brain is Plastic',
1757 'uploader_id': 'scishow',
1758 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1759 'upload_date': '20140324',
1760 'uploader': 'SciShow',
1761 }, 'params': {'format': 'mhtml', 'skip_download': True}
1762 }
1763 ]
1764
1765 @classmethod
1766 def suitable(cls, url):
1767 from ..utils import parse_qs
1768
1769 qs = parse_qs(url)
1770 if qs.get('list', [None])[0]:
1771 return False
1772 return super(YoutubeIE, cls).suitable(url)
1773
1774 def __init__(self, *args, **kwargs):
1775 super(YoutubeIE, self).__init__(*args, **kwargs)
1776 self._code_cache = {}
1777 self._player_cache = {}
1778
1779 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
1780 lock = threading.Lock()
1781
1782 is_live = True
1783 start_time = time.time()
1784 formats = [f for f in formats if f.get('is_from_start')]
1785
1786 def refetch_manifest(format_id, delay):
1787 nonlocal formats, start_time, is_live
1788 if time.time() <= start_time + delay:
1789 return
1790
1791 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
1792 video_details = traverse_obj(
1793 prs, (..., 'videoDetails'), expected_type=dict, default=[])
1794 microformats = traverse_obj(
1795 prs, (..., 'microformat', 'playerMicroformatRenderer'),
1796 expected_type=dict, default=[])
1797 _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
1798 start_time = time.time()
1799
1800 def mpd_feed(format_id, delay):
1801 """
1802 @returns (manifest_url, manifest_stream_number, is_live) or None
1803 """
1804 with lock:
1805 refetch_manifest(format_id, delay)
1806
1807 f = next((f for f in formats if f['format_id'] == format_id), None)
1808 if not f:
1809 if not is_live:
1810 self.to_screen(f'{video_id}: Video is no longer live')
1811 else:
1812 self.report_warning(
1813 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
1814 return None
1815 return f['manifest_url'], f['manifest_stream_number'], is_live
1816
1817 for f in formats:
1818 f['protocol'] = 'http_dash_segments_generator'
1819 f['fragments'] = functools.partial(
1820 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
1821
1822 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
1823 FETCH_SPAN, MAX_DURATION = 5, 432000
1824
1825 mpd_url, stream_number, is_live = None, None, True
1826
1827 begin_index = 0
1828 download_start_time = ctx.get('start') or time.time()
1829
1830 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
1831 if lack_early_segments:
1832 self.report_warning(bug_reports_message(
1833 'Starting download from the last 120 hours of the live stream since '
1834 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
1835 lack_early_segments = True
1836
1837 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
1838 fragments, fragment_base_url = None, None
1839
1840 def _extract_sequence_from_mpd(refresh_sequence):
1841 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
1842 # Obtain from MPD's maximum seq value
1843 old_mpd_url = mpd_url
1844 last_error = ctx.pop('last_error', None)
1845 expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
1846 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
1847 or (mpd_url, stream_number, False))
1848 if not refresh_sequence:
1849 if expire_fast and not is_live:
1850 return False, last_seq
1851 elif old_mpd_url == mpd_url:
1852 return True, last_seq
1853 try:
1854 fmts, _ = self._extract_mpd_formats_and_subtitles(
1855 mpd_url, None, note=False, errnote=False, fatal=False)
1856 except ExtractorError:
1857 fmts = None
1858 if not fmts:
1859 no_fragment_score += 1
1860 return False, last_seq
1861 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
1862 fragments = fmt_info['fragments']
1863 fragment_base_url = fmt_info['fragment_base_url']
1864 assert fragment_base_url
1865
1866 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
1867 return True, _last_seq
1868
1869 while is_live:
1870 fetch_time = time.time()
1871 if no_fragment_score > 30:
1872 return
1873 if last_segment_url:
1874 # Obtain from "X-Head-Seqnum" header value from each segment
1875 try:
1876 urlh = self._request_webpage(
1877 last_segment_url, None, note=False, errnote=False, fatal=False)
1878 except ExtractorError:
1879 urlh = None
1880 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
1881 if last_seq is None:
1882 no_fragment_score += 1
1883 last_segment_url = None
1884 continue
1885 else:
1886 should_continue, last_seq = _extract_sequence_from_mpd(True)
1887 if not should_continue:
1888 continue
1889
1890 if known_idx > last_seq:
1891 last_segment_url = None
1892 continue
1893
1894 last_seq += 1
1895
1896 if begin_index < 0 and known_idx < 0:
1897 # skip from the start when it's negative value
1898 known_idx = last_seq + begin_index
1899 if lack_early_segments:
1900 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
1901 try:
1902 for idx in range(known_idx, last_seq):
1903 # do not update sequence here or you'll get skipped some part of it
1904 should_continue, _ = _extract_sequence_from_mpd(False)
1905 if not should_continue:
1906 known_idx = idx - 1
1907 raise ExtractorError('breaking out of outer loop')
1908 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
1909 yield {
1910 'url': last_segment_url,
1911 }
1912 if known_idx == last_seq:
1913 no_fragment_score += 5
1914 else:
1915 no_fragment_score = 0
1916 known_idx = last_seq
1917 except ExtractorError:
1918 continue
1919
1920 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
1921
1922 def _extract_player_url(self, *ytcfgs, webpage=None):
1923 player_url = traverse_obj(
1924 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1925 get_all=False, expected_type=compat_str)
1926 if not player_url:
1927 return
1928 if player_url.startswith('//'):
1929 player_url = 'https:' + player_url
1930 elif not re.match(r'https?://', player_url):
1931 player_url = compat_urlparse.urljoin(
1932 'https://www.youtube.com', player_url)
1933 return player_url
1934
1935 def _download_player_url(self, video_id, fatal=False):
1936 res = self._download_webpage(
1937 'https://www.youtube.com/iframe_api',
1938 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1939 if res:
1940 player_version = self._search_regex(
1941 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1942 if player_version:
1943 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1944
1945 def _signature_cache_id(self, example_sig):
1946 """ Return a string representation of a signature """
1947 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1948
1949 @classmethod
1950 def _extract_player_info(cls, player_url):
1951 for player_re in cls._PLAYER_INFO_RE:
1952 id_m = re.search(player_re, player_url)
1953 if id_m:
1954 break
1955 else:
1956 raise ExtractorError('Cannot identify player %r' % player_url)
1957 return id_m.group('id')
1958
1959 def _load_player(self, video_id, player_url, fatal=True):
1960 player_id = self._extract_player_info(player_url)
1961 if player_id not in self._code_cache:
1962 code = self._download_webpage(
1963 player_url, video_id, fatal=fatal,
1964 note='Downloading player ' + player_id,
1965 errnote='Download of %s failed' % player_url)
1966 if code:
1967 self._code_cache[player_id] = code
1968 return self._code_cache.get(player_id)
1969
1970 def _extract_signature_function(self, video_id, player_url, example_sig):
1971 player_id = self._extract_player_info(player_url)
1972
1973 # Read from filesystem cache
1974 func_id = 'js_%s_%s' % (
1975 player_id, self._signature_cache_id(example_sig))
1976 assert os.path.basename(func_id) == func_id
1977
1978 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1979 if cache_spec is not None:
1980 return lambda s: ''.join(s[i] for i in cache_spec)
1981
1982 code = self._load_player(video_id, player_url)
1983 if code:
1984 res = self._parse_sig_js(code)
1985
1986 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1987 cache_res = res(test_string)
1988 cache_spec = [ord(c) for c in cache_res]
1989
1990 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1991 return res
1992
1993 def _print_sig_code(self, func, example_sig):
1994 if not self.get_param('youtube_print_sig_code'):
1995 return
1996
1997 def gen_sig_code(idxs):
1998 def _genslice(start, end, step):
1999 starts = '' if start == 0 else str(start)
2000 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2001 steps = '' if step == 1 else (':%d' % step)
2002 return 's[%s%s%s]' % (starts, ends, steps)
2003
2004 step = None
2005 # Quelch pyflakes warnings - start will be set when step is set
2006 start = '(Never used)'
2007 for i, prev in zip(idxs[1:], idxs[:-1]):
2008 if step is not None:
2009 if i - prev == step:
2010 continue
2011 yield _genslice(start, prev, step)
2012 step = None
2013 continue
2014 if i - prev in [-1, 1]:
2015 step = i - prev
2016 start = prev
2017 continue
2018 else:
2019 yield 's[%d]' % prev
2020 if step is None:
2021 yield 's[%d]' % i
2022 else:
2023 yield _genslice(start, i, step)
2024
2025 test_string = ''.join(map(compat_chr, range(len(example_sig))))
2026 cache_res = func(test_string)
2027 cache_spec = [ord(c) for c in cache_res]
2028 expr_code = ' + '.join(gen_sig_code(cache_spec))
2029 signature_id_tuple = '(%s)' % (
2030 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
2031 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2032 ' return %s\n') % (signature_id_tuple, expr_code)
2033 self.to_screen('Extracted signature function:\n' + code)
2034
2035 def _parse_sig_js(self, jscode):
2036 funcname = self._search_regex(
2037 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2038 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2039 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2040 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2041 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2042 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2043 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2044 # Obsolete patterns
2045 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2046 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2047 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2048 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2049 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2050 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2051 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2052 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2053 jscode, 'Initial JS player signature function name', group='sig')
2054
2055 jsi = JSInterpreter(jscode)
2056 initial_function = jsi.extract_function(funcname)
2057 return lambda s: initial_function([s])
2058
2059 def _decrypt_signature(self, s, video_id, player_url):
2060 """Turn the encrypted s field into a working signature"""
2061
2062 if player_url is None:
2063 raise ExtractorError('Cannot decrypt signature without player_url')
2064
2065 try:
2066 player_id = (player_url, self._signature_cache_id(s))
2067 if player_id not in self._player_cache:
2068 func = self._extract_signature_function(
2069 video_id, player_url, s
2070 )
2071 self._player_cache[player_id] = func
2072 func = self._player_cache[player_id]
2073 self._print_sig_code(func, s)
2074 return func(s)
2075 except Exception as e:
2076 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
2077
2078 def _decrypt_nsig(self, s, video_id, player_url):
2079 """Turn the encrypted n field into a working signature"""
2080 if player_url is None:
2081 raise ExtractorError('Cannot decrypt nsig without player_url')
2082 if player_url.startswith('//'):
2083 player_url = 'https:' + player_url
2084 elif not re.match(r'https?://', player_url):
2085 player_url = compat_urlparse.urljoin(
2086 'https://www.youtube.com', player_url)
2087
2088 sig_id = ('nsig_value', s)
2089 if sig_id in self._player_cache:
2090 return self._player_cache[sig_id]
2091
2092 try:
2093 player_id = ('nsig', player_url)
2094 if player_id not in self._player_cache:
2095 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
2096 func = self._player_cache[player_id]
2097 self._player_cache[sig_id] = func(s)
2098 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
2099 return self._player_cache[sig_id]
2100 except Exception as e:
2101 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
2102
2103 def _extract_n_function_name(self, jscode):
2104 return self._search_regex(
2105 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
2106 jscode, 'Initial JS player n function name', group='nfunc')
2107
2108 def _extract_n_function(self, video_id, player_url):
2109 player_id = self._extract_player_info(player_url)
2110 func_code = self._downloader.cache.load('youtube-nsig', player_id)
2111
2112 if func_code:
2113 jsi = JSInterpreter(func_code)
2114 else:
2115 jscode = self._load_player(video_id, player_url)
2116 funcname = self._extract_n_function_name(jscode)
2117 jsi = JSInterpreter(jscode)
2118 func_code = jsi.extract_function_code(funcname)
2119 self._downloader.cache.store('youtube-nsig', player_id, func_code)
2120
2121 if self.get_param('youtube_print_sig_code'):
2122 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2123
2124 return lambda s: jsi.extract_function_from_code(*func_code)([s])
2125
2126 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2127 """
2128 Extract signatureTimestamp (sts)
2129 Required to tell API what sig/player version is in use.
2130 """
2131 sts = None
2132 if isinstance(ytcfg, dict):
2133 sts = int_or_none(ytcfg.get('STS'))
2134
2135 if not sts:
2136 # Attempt to extract from player
2137 if player_url is None:
2138 error_msg = 'Cannot extract signature timestamp without player_url.'
2139 if fatal:
2140 raise ExtractorError(error_msg)
2141 self.report_warning(error_msg)
2142 return
2143 code = self._load_player(video_id, player_url, fatal=fatal)
2144 if code:
2145 sts = int_or_none(self._search_regex(
2146 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2147 'JS player signature timestamp', group='sts', fatal=fatal))
2148 return sts
2149
2150 def _mark_watched(self, video_id, player_responses):
2151 playback_url = get_first(
2152 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2153 expected_type=url_or_none)
2154 if not playback_url:
2155 self.report_warning('Unable to mark watched')
2156 return
2157 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2158 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2159
2160 # cpn generation algorithm is reverse engineered from base.js.
2161 # In fact it works even with dummy cpn.
2162 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2163 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2164
2165 qs.update({
2166 'ver': ['2'],
2167 'cpn': [cpn],
2168 })
2169 playback_url = compat_urlparse.urlunparse(
2170 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2171
2172 self._download_webpage(
2173 playback_url, video_id, 'Marking watched',
2174 'Unable to mark watched', fatal=False)
2175
2176 @staticmethod
2177 def _extract_urls(webpage):
2178 # Embedded YouTube player
2179 entries = [
2180 unescapeHTML(mobj.group('url'))
2181 for mobj in re.finditer(r'''(?x)
2182 (?:
2183 <iframe[^>]+?src=|
2184 data-video-url=|
2185 <embed[^>]+?src=|
2186 embedSWF\(?:\s*|
2187 <object[^>]+data=|
2188 new\s+SWFObject\(
2189 )
2190 (["\'])
2191 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2192 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2193 \1''', webpage)]
2194
2195 # lazyYT YouTube embed
2196 entries.extend(list(map(
2197 unescapeHTML,
2198 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2199
2200 # Wordpress "YouTube Video Importer" plugin
2201 matches = re.findall(r'''(?x)<div[^>]+
2202 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2203 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2204 entries.extend(m[-1] for m in matches)
2205
2206 return entries
2207
2208 @staticmethod
2209 def _extract_url(webpage):
2210 urls = YoutubeIE._extract_urls(webpage)
2211 return urls[0] if urls else None
2212
2213 @classmethod
2214 def extract_id(cls, url):
2215 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2216 if mobj is None:
2217 raise ExtractorError('Invalid URL: %s' % url)
2218 return mobj.group('id')
2219
2220 def _extract_chapters_from_json(self, data, duration):
2221 chapter_list = traverse_obj(
2222 data, (
2223 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2224 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2225 ), expected_type=list)
2226
2227 return self._extract_chapters(
2228 chapter_list,
2229 chapter_time=lambda chapter: float_or_none(
2230 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2231 chapter_title=lambda chapter: traverse_obj(
2232 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2233 duration=duration)
2234
2235 def _extract_chapters_from_engagement_panel(self, data, duration):
2236 content_list = traverse_obj(
2237 data,
2238 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2239 expected_type=list, default=[])
2240 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2241 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2242
2243 return next((
2244 filter(None, (
2245 self._extract_chapters(
2246 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2247 chapter_time, chapter_title, duration)
2248 for contents in content_list
2249 ))), [])
2250
2251 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2252 chapters = []
2253 last_chapter = {'start_time': 0}
2254 for idx, chapter in enumerate(chapter_list or []):
2255 title = chapter_title(chapter)
2256 start_time = chapter_time(chapter)
2257 if start_time is None:
2258 continue
2259 last_chapter['end_time'] = start_time
2260 if start_time < last_chapter['start_time']:
2261 if idx == 1:
2262 chapters.pop()
2263 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2264 else:
2265 self.report_warning(f'Invalid start time for chapter "{title}"')
2266 continue
2267 last_chapter = {'start_time': start_time, 'title': title}
2268 chapters.append(last_chapter)
2269 last_chapter['end_time'] = duration
2270 return chapters
2271
2272 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2273 return self._parse_json(self._search_regex(
2274 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2275 regex), webpage, name, default='{}'), video_id, fatal=False)
2276
2277 def _extract_comment(self, comment_renderer, parent=None):
2278 comment_id = comment_renderer.get('commentId')
2279 if not comment_id:
2280 return
2281
2282 text = self._get_text(comment_renderer, 'contentText')
2283
2284 # note: timestamp is an estimate calculated from the current time and time_text
2285 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2286 author = self._get_text(comment_renderer, 'authorText')
2287 author_id = try_get(comment_renderer,
2288 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2289
2290 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2291 lambda x: x['likeCount']), compat_str)) or 0
2292 author_thumbnail = try_get(comment_renderer,
2293 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2294
2295 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2296 is_favorited = 'creatorHeart' in (try_get(
2297 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2298 return {
2299 'id': comment_id,
2300 'text': text,
2301 'timestamp': timestamp,
2302 'time_text': time_text,
2303 'like_count': votes,
2304 'is_favorited': is_favorited,
2305 'author': author,
2306 'author_id': author_id,
2307 'author_thumbnail': author_thumbnail,
2308 'author_is_uploader': author_is_uploader,
2309 'parent': parent or 'root'
2310 }
2311
2312 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2313
2314 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2315
2316 def extract_header(contents):
2317 _continuation = None
2318 for content in contents:
2319 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2320 expected_comment_count = parse_count(self._get_text(
2321 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2322
2323 if expected_comment_count:
2324 tracker['est_total'] = expected_comment_count
2325 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2326 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2327
2328 sort_menu_item = try_get(
2329 comments_header_renderer,
2330 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2331 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2332
2333 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2334 if not _continuation:
2335 continue
2336
2337 sort_text = str_or_none(sort_menu_item.get('title'))
2338 if not sort_text:
2339 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2340 self.to_screen('Sorting comments by %s' % sort_text.lower())
2341 break
2342 return _continuation
2343
2344 def extract_thread(contents):
2345 if not parent:
2346 tracker['current_page_thread'] = 0
2347 for content in contents:
2348 if not parent and tracker['total_parent_comments'] >= max_parents:
2349 yield
2350 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2351 comment_renderer = get_first(
2352 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2353 expected_type=dict, default={})
2354
2355 comment = self._extract_comment(comment_renderer, parent)
2356 if not comment:
2357 continue
2358
2359 tracker['running_total'] += 1
2360 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2361 yield comment
2362
2363 # Attempt to get the replies
2364 comment_replies_renderer = try_get(
2365 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2366
2367 if comment_replies_renderer:
2368 tracker['current_page_thread'] += 1
2369 comment_entries_iter = self._comment_entries(
2370 comment_replies_renderer, ytcfg, video_id,
2371 parent=comment.get('id'), tracker=tracker)
2372 for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
2373 yield reply_comment
2374
2375 # Keeps track of counts across recursive calls
2376 if not tracker:
2377 tracker = dict(
2378 running_total=0,
2379 est_total=0,
2380 current_page_thread=0,
2381 total_parent_comments=0,
2382 total_reply_comments=0)
2383
2384 # TODO: Deprecated
2385 # YouTube comments have a max depth of 2
2386 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2387 if max_depth:
2388 self._downloader.deprecation_warning(
2389 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2390 if max_depth == 1 and parent:
2391 return
2392
2393 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2394 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2395
2396 continuation = self._extract_continuation(root_continuation_data)
2397 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2398 if message and not parent:
2399 self.report_warning(message, video_id=video_id)
2400
2401 response = None
2402 is_first_continuation = parent is None
2403
2404 for page_num in itertools.count(0):
2405 if not continuation:
2406 break
2407 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2408 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2409 if page_num == 0:
2410 if is_first_continuation:
2411 note_prefix = 'Downloading comment section API JSON'
2412 else:
2413 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2414 tracker['current_page_thread'], comment_prog_str)
2415 else:
2416 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2417 ' ' if parent else '', ' replies' if parent else '',
2418 page_num, comment_prog_str)
2419
2420 response = self._extract_response(
2421 item_id=None, query=continuation,
2422 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2423 check_get_keys='onResponseReceivedEndpoints')
2424
2425 continuation_contents = traverse_obj(
2426 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
2427
2428 continuation = None
2429 for continuation_section in continuation_contents:
2430 continuation_items = traverse_obj(
2431 continuation_section,
2432 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2433 get_all=False, expected_type=list) or []
2434 if is_first_continuation:
2435 continuation = extract_header(continuation_items)
2436 is_first_continuation = False
2437 if continuation:
2438 break
2439 continue
2440
2441 for entry in extract_thread(continuation_items):
2442 if not entry:
2443 return
2444 yield entry
2445 continuation = self._extract_continuation({'contents': continuation_items})
2446 if continuation:
2447 break
2448
2449 def _get_comments(self, ytcfg, video_id, contents, webpage):
2450 """Entry for comment extraction"""
2451 def _real_comment_extract(contents):
2452 renderer = next((
2453 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2454 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2455 yield from self._comment_entries(renderer, ytcfg, video_id)
2456
2457 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2458 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2459
2460 @staticmethod
2461 def _get_checkok_params():
2462 return {'contentCheckOk': True, 'racyCheckOk': True}
2463
2464 @classmethod
2465 def _generate_player_context(cls, sts=None):
2466 context = {
2467 'html5Preference': 'HTML5_PREF_WANTS',
2468 }
2469 if sts is not None:
2470 context['signatureTimestamp'] = sts
2471 return {
2472 'playbackContext': {
2473 'contentPlaybackContext': context
2474 },
2475 **cls._get_checkok_params()
2476 }
2477
2478 @staticmethod
2479 def _is_agegated(player_response):
2480 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2481 return True
2482
2483 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2484 AGE_GATE_REASONS = (
2485 'confirm your age', 'age-restricted', 'inappropriate', # reason
2486 'age_verification_required', 'age_check_required', # status
2487 )
2488 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2489
2490 @staticmethod
2491 def _is_unplayable(player_response):
2492 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2493
2494 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2495
2496 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2497 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2498 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2499 headers = self.generate_api_headers(
2500 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2501
2502 yt_query = {'videoId': video_id}
2503 yt_query.update(self._generate_player_context(sts))
2504 return self._extract_response(
2505 item_id=video_id, ep='player', query=yt_query,
2506 ytcfg=player_ytcfg, headers=headers, fatal=True,
2507 default_client=client,
2508 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2509 ) or None
2510
2511 def _get_requested_clients(self, url, smuggled_data):
2512 requested_clients = []
2513 default = ['android', 'web']
2514 allowed_clients = sorted(
2515 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2516 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2517 for client in self._configuration_arg('player_client'):
2518 if client in allowed_clients:
2519 requested_clients.append(client)
2520 elif client == 'default':
2521 requested_clients.extend(default)
2522 elif client == 'all':
2523 requested_clients.extend(allowed_clients)
2524 else:
2525 self.report_warning(f'Skipping unsupported client {client}')
2526 if not requested_clients:
2527 requested_clients = default
2528
2529 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2530 requested_clients.extend(
2531 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2532
2533 return orderedSet(requested_clients)
2534
2535 def _extract_player_ytcfg(self, client, video_id):
2536 url = {
2537 'web_music': 'https://music.youtube.com',
2538 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2539 }.get(client)
2540 if not url:
2541 return {}
2542 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2543 return self.extract_ytcfg(video_id, webpage) or {}
2544
2545 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2546 initial_pr = None
2547 if webpage:
2548 initial_pr = self._extract_yt_initial_variable(
2549 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2550 video_id, 'initial player response')
2551
2552 original_clients = clients
2553 clients = clients[::-1]
2554 prs = []
2555
2556 def append_client(client_name):
2557 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2558 clients.append(client_name)
2559
2560 # Android player_response does not have microFormats which are needed for
2561 # extraction of some data. So we return the initial_pr with formats
2562 # stripped out even if not requested by the user
2563 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2564 if initial_pr:
2565 pr = dict(initial_pr)
2566 pr['streamingData'] = None
2567 prs.append(pr)
2568
2569 last_error = None
2570 tried_iframe_fallback = False
2571 player_url = None
2572 while clients:
2573 client = clients.pop()
2574 player_ytcfg = master_ytcfg if client == 'web' else {}
2575 if 'configs' not in self._configuration_arg('player_skip'):
2576 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2577
2578 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2579 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2580 if 'js' in self._configuration_arg('player_skip'):
2581 require_js_player = False
2582 player_url = None
2583
2584 if not player_url and not tried_iframe_fallback and require_js_player:
2585 player_url = self._download_player_url(video_id)
2586 tried_iframe_fallback = True
2587
2588 try:
2589 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2590 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2591 except ExtractorError as e:
2592 if last_error:
2593 self.report_warning(last_error)
2594 last_error = e
2595 continue
2596
2597 if pr:
2598 prs.append(pr)
2599
2600 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2601 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2602 append_client(client.replace('_agegate', '_creator'))
2603 elif self._is_agegated(pr):
2604 append_client(f'{client}_agegate')
2605
2606 if last_error:
2607 if not len(prs):
2608 raise last_error
2609 self.report_warning(last_error)
2610 return prs, player_url
2611
2612 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2613 itags, stream_ids = {}, []
2614 itag_qualities, res_qualities = {}, {}
2615 q = qualities([
2616 # Normally tiny is the smallest video-only formats. But
2617 # audio-only formats with unknown quality may get tagged as tiny
2618 'tiny',
2619 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2620 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2621 ])
2622 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2623
2624 for fmt in streaming_formats:
2625 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2626 continue
2627
2628 itag = str_or_none(fmt.get('itag'))
2629 audio_track = fmt.get('audioTrack') or {}
2630 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2631 if stream_id in stream_ids:
2632 continue
2633
2634 quality = fmt.get('quality')
2635 height = int_or_none(fmt.get('height'))
2636 if quality == 'tiny' or not quality:
2637 quality = fmt.get('audioQuality', '').lower() or quality
2638 # The 3gp format (17) in android client has a quality of "small",
2639 # but is actually worse than other formats
2640 if itag == '17':
2641 quality = 'tiny'
2642 if quality:
2643 if itag:
2644 itag_qualities[itag] = quality
2645 if height:
2646 res_qualities[height] = quality
2647 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2648 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2649 # number of fragment that would subsequently requested with (`&sq=N`)
2650 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2651 continue
2652
2653 fmt_url = fmt.get('url')
2654 if not fmt_url:
2655 sc = compat_parse_qs(fmt.get('signatureCipher'))
2656 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2657 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2658 if not (sc and fmt_url and encrypted_sig):
2659 continue
2660 if not player_url:
2661 continue
2662 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2663 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2664 fmt_url += '&' + sp + '=' + signature
2665
2666 query = parse_qs(fmt_url)
2667 throttled = False
2668 if query.get('n'):
2669 try:
2670 fmt_url = update_url_query(fmt_url, {
2671 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2672 except ExtractorError as e:
2673 self.report_warning(
2674 f'nsig extraction failed: You may experience throttling for some formats\n'
2675 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
2676 throttled = True
2677
2678 if itag:
2679 itags[itag] = 'https'
2680 stream_ids.append(stream_id)
2681
2682 tbr = float_or_none(
2683 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2684 dct = {
2685 'asr': int_or_none(fmt.get('audioSampleRate')),
2686 'filesize': int_or_none(fmt.get('contentLength')),
2687 'format_id': itag,
2688 'format_note': join_nonempty(
2689 '%s%s' % (audio_track.get('displayName') or '',
2690 ' (default)' if audio_track.get('audioIsDefault') else ''),
2691 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
2692 throttled and 'THROTTLED', delim=', '),
2693 'source_preference': -10 if throttled else -1,
2694 'fps': int_or_none(fmt.get('fps')) or None,
2695 'height': height,
2696 'quality': q(quality),
2697 'tbr': tbr,
2698 'url': fmt_url,
2699 'width': int_or_none(fmt.get('width')),
2700 'language': audio_track.get('id', '').split('.')[0],
2701 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2702 }
2703 mime_mobj = re.match(
2704 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2705 if mime_mobj:
2706 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2707 dct.update(parse_codecs(mime_mobj.group(2)))
2708 no_audio = dct.get('acodec') == 'none'
2709 no_video = dct.get('vcodec') == 'none'
2710 if no_audio:
2711 dct['vbr'] = tbr
2712 if no_video:
2713 dct['abr'] = tbr
2714 if no_audio or no_video:
2715 dct['downloader_options'] = {
2716 # Youtube throttles chunks >~10M
2717 'http_chunk_size': 10485760,
2718 }
2719 if dct.get('ext'):
2720 dct['container'] = dct['ext'] + '_dash'
2721 yield dct
2722
2723 live_from_start = is_live and self.get_param('live_from_start')
2724 skip_manifests = self._configuration_arg('skip')
2725 if not self.get_param('youtube_include_hls_manifest', True):
2726 skip_manifests.append('hls')
2727 get_dash = 'dash' not in skip_manifests and (
2728 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
2729 get_hls = not live_from_start and 'hls' not in skip_manifests
2730
2731 def process_manifest_format(f, proto, itag):
2732 if itag in itags:
2733 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2734 return False
2735 itag = f'{itag}-{proto}'
2736 if itag:
2737 f['format_id'] = itag
2738 itags[itag] = proto
2739
2740 f['quality'] = next((
2741 q(qdict[val])
2742 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
2743 if val in qdict), -1)
2744 return True
2745
2746 for sd in streaming_data:
2747 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2748 if hls_manifest_url:
2749 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2750 if process_manifest_format(f, 'hls', self._search_regex(
2751 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2752 yield f
2753
2754 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2755 if dash_manifest_url:
2756 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2757 if process_manifest_format(f, 'dash', f['format_id']):
2758 f['filesize'] = int_or_none(self._search_regex(
2759 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2760 if live_from_start:
2761 f['is_from_start'] = True
2762
2763 yield f
2764
2765 def _extract_storyboard(self, player_responses, duration):
2766 spec = get_first(
2767 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2768 if not spec:
2769 return
2770 base_url = spec.pop()
2771 L = len(spec) - 1
2772 for i, args in enumerate(spec):
2773 args = args.split('#')
2774 counts = list(map(int_or_none, args[:5]))
2775 if len(args) != 8 or not all(counts):
2776 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2777 continue
2778 width, height, frame_count, cols, rows = counts
2779 N, sigh = args[6:]
2780
2781 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2782 fragment_count = frame_count / (cols * rows)
2783 fragment_duration = duration / fragment_count
2784 yield {
2785 'format_id': f'sb{i}',
2786 'format_note': 'storyboard',
2787 'ext': 'mhtml',
2788 'protocol': 'mhtml',
2789 'acodec': 'none',
2790 'vcodec': 'none',
2791 'url': url,
2792 'width': width,
2793 'height': height,
2794 'fragments': [{
2795 'path': url.replace('$M', str(j)),
2796 'duration': min(fragment_duration, duration - (j * fragment_duration)),
2797 } for j in range(math.ceil(fragment_count))],
2798 }
2799
2800 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
2801 webpage = None
2802 if 'webpage' not in self._configuration_arg('player_skip'):
2803 webpage = self._download_webpage(
2804 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2805
2806 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2807
2808 player_responses, player_url = self._extract_player_responses(
2809 self._get_requested_clients(url, smuggled_data),
2810 video_id, webpage, master_ytcfg)
2811
2812 return webpage, master_ytcfg, player_responses, player_url
2813
2814 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):
2815 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2816 is_live = get_first(video_details, 'isLive')
2817 if is_live is None:
2818 is_live = get_first(live_broadcast_details, 'isLiveNow')
2819
2820 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2821 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2822
2823 return live_broadcast_details, is_live, streaming_data, formats
2824
2825 def _real_extract(self, url):
2826 url, smuggled_data = unsmuggle_url(url, {})
2827 video_id = self._match_id(url)
2828
2829 base_url = self.http_scheme() + '//www.youtube.com/'
2830 webpage_url = base_url + 'watch?v=' + video_id
2831
2832 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2833
2834 playability_statuses = traverse_obj(
2835 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2836
2837 trailer_video_id = get_first(
2838 playability_statuses,
2839 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2840 expected_type=str)
2841 if trailer_video_id:
2842 return self.url_result(
2843 trailer_video_id, self.ie_key(), trailer_video_id)
2844
2845 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2846 if webpage else (lambda x: None))
2847
2848 video_details = traverse_obj(
2849 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2850 microformats = traverse_obj(
2851 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2852 expected_type=dict, default=[])
2853 video_title = (
2854 get_first(video_details, 'title')
2855 or self._get_text(microformats, (..., 'title'))
2856 or search_meta(['og:title', 'twitter:title', 'title']))
2857 video_description = get_first(video_details, 'shortDescription')
2858
2859 multifeed_metadata_list = get_first(
2860 player_responses,
2861 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2862 expected_type=str)
2863 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2864 if self.get_param('noplaylist'):
2865 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2866 else:
2867 entries = []
2868 feed_ids = []
2869 for feed in multifeed_metadata_list.split(','):
2870 # Unquote should take place before split on comma (,) since textual
2871 # fields may contain comma as well (see
2872 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2873 feed_data = compat_parse_qs(
2874 compat_urllib_parse_unquote_plus(feed))
2875
2876 def feed_entry(name):
2877 return try_get(
2878 feed_data, lambda x: x[name][0], compat_str)
2879
2880 feed_id = feed_entry('id')
2881 if not feed_id:
2882 continue
2883 feed_title = feed_entry('title')
2884 title = video_title
2885 if feed_title:
2886 title += ' (%s)' % feed_title
2887 entries.append({
2888 '_type': 'url_transparent',
2889 'ie_key': 'Youtube',
2890 'url': smuggle_url(
2891 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2892 {'force_singlefeed': True}),
2893 'title': title,
2894 })
2895 feed_ids.append(feed_id)
2896 self.to_screen(
2897 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2898 % (', '.join(feed_ids), video_id))
2899 return self.playlist_result(
2900 entries, video_id, video_title, video_description)
2901
2902 live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)
2903
2904 if not formats:
2905 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2906 self.report_drm(video_id)
2907 pemr = get_first(
2908 playability_statuses,
2909 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2910 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2911 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2912 if subreason:
2913 if subreason == 'The uploader has not made this video available in your country.':
2914 countries = get_first(microformats, 'availableCountries')
2915 if not countries:
2916 regions_allowed = search_meta('regionsAllowed')
2917 countries = regions_allowed.split(',') if regions_allowed else None
2918 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2919 reason += f'. {subreason}'
2920 if reason:
2921 self.raise_no_formats(reason, expected=True)
2922
2923 keywords = get_first(video_details, 'keywords', expected_type=list) or []
2924 if not keywords and webpage:
2925 keywords = [
2926 unescapeHTML(m.group('content'))
2927 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2928 for keyword in keywords:
2929 if keyword.startswith('yt:stretch='):
2930 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2931 if mobj:
2932 # NB: float is intentional for forcing float division
2933 w, h = (float(v) for v in mobj.groups())
2934 if w > 0 and h > 0:
2935 ratio = w / h
2936 for f in formats:
2937 if f.get('vcodec') != 'none':
2938 f['stretched_ratio'] = ratio
2939 break
2940 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
2941 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2942 if thumbnail_url:
2943 thumbnails.append({
2944 'url': thumbnail_url,
2945 })
2946 original_thumbnails = thumbnails.copy()
2947
2948 # The best resolution thumbnails sometimes does not appear in the webpage
2949 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2950 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2951 thumbnail_names = [
2952 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
2953 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2954 'mqdefault', 'mq1', 'mq2', 'mq3',
2955 'default', '1', '2', '3'
2956 ]
2957 n_thumbnail_names = len(thumbnail_names)
2958 thumbnails.extend({
2959 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2960 video_id=video_id, name=name, ext=ext,
2961 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2962 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2963 for thumb in thumbnails:
2964 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2965 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2966 self._remove_duplicate_formats(thumbnails)
2967 self._downloader._sort_thumbnails(original_thumbnails)
2968
2969 category = get_first(microformats, 'category') or search_meta('genre')
2970 channel_id = str_or_none(
2971 get_first(video_details, 'channelId')
2972 or get_first(microformats, 'externalChannelId')
2973 or search_meta('channelId'))
2974 duration = int_or_none(
2975 get_first(video_details, 'lengthSeconds')
2976 or get_first(microformats, 'lengthSeconds')
2977 or parse_duration(search_meta('duration'))) or None
2978 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2979
2980 live_content = get_first(video_details, 'isLiveContent')
2981 is_upcoming = get_first(video_details, 'isUpcoming')
2982 if is_live is None:
2983 if is_upcoming or live_content is False:
2984 is_live = False
2985 if is_upcoming is None and (live_content or is_live):
2986 is_upcoming = False
2987 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2988 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2989 if not duration and live_end_time and live_start_time:
2990 duration = live_end_time - live_start_time
2991
2992 if is_live and self.get_param('live_from_start'):
2993 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
2994
2995 formats.extend(self._extract_storyboard(player_responses, duration))
2996
2997 # Source is given priority since formats that throttle are given lower source_preference
2998 # When throttling issue is fully fixed, remove this
2999 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
3000
3001 info = {
3002 'id': video_id,
3003 'title': video_title,
3004 'formats': formats,
3005 'thumbnails': thumbnails,
3006 # The best thumbnail that we are sure exists. Prevents unnecessary
3007 # URL checking if user don't care about getting the best possible thumbnail
3008 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
3009 'description': video_description,
3010 'upload_date': unified_strdate(
3011 get_first(microformats, 'uploadDate')
3012 or search_meta('uploadDate')),
3013 'uploader': get_first(video_details, 'author'),
3014 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3015 'uploader_url': owner_profile_url,
3016 'channel_id': channel_id,
3017 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
3018 'duration': duration,
3019 'view_count': int_or_none(
3020 get_first((video_details, microformats), (..., 'viewCount'))
3021 or search_meta('interactionCount')),
3022 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3023 'age_limit': 18 if (
3024 get_first(microformats, 'isFamilySafe') is False
3025 or search_meta('isFamilyFriendly') == 'false'
3026 or search_meta('og:restrictions:age') == '18+') else 0,
3027 'webpage_url': webpage_url,
3028 'categories': [category] if category else None,
3029 'tags': keywords,
3030 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3031 'is_live': is_live,
3032 'was_live': (False if is_live or is_upcoming or live_content is False
3033 else None if is_live is None or is_upcoming is None
3034 else live_content),
3035 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
3036 'release_timestamp': live_start_time,
3037 }
3038
3039 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3040 if pctr:
3041 def get_lang_code(track):
3042 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3043 or track.get('languageCode'))
3044
3045 # Converted into dicts to remove duplicates
3046 captions = {
3047 get_lang_code(sub): sub
3048 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3049 translation_languages = {
3050 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3051 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3052
3053 def process_language(container, base_url, lang_code, sub_name, query):
3054 lang_subs = container.setdefault(lang_code, [])
3055 for fmt in self._SUBTITLE_FORMATS:
3056 query.update({
3057 'fmt': fmt,
3058 })
3059 lang_subs.append({
3060 'ext': fmt,
3061 'url': update_url_query(base_url, query),
3062 'name': sub_name,
3063 })
3064
3065 subtitles, automatic_captions = {}, {}
3066 for lang_code, caption_track in captions.items():
3067 base_url = caption_track.get('baseUrl')
3068 if not base_url:
3069 continue
3070 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3071 if caption_track.get('kind') != 'asr':
3072 if not lang_code:
3073 continue
3074 process_language(
3075 subtitles, base_url, lang_code, lang_name, {})
3076 if not caption_track.get('isTranslatable'):
3077 continue
3078 for trans_code, trans_name in translation_languages.items():
3079 if not trans_code:
3080 continue
3081 if caption_track.get('kind') != 'asr':
3082 trans_code += f'-{lang_code}'
3083 trans_name += format_field(lang_name, template=' from %s')
3084 process_language(
3085 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
3086 info['automatic_captions'] = automatic_captions
3087 info['subtitles'] = subtitles
3088
3089 parsed_url = compat_urllib_parse_urlparse(url)
3090 for component in [parsed_url.fragment, parsed_url.query]:
3091 query = compat_parse_qs(component)
3092 for k, v in query.items():
3093 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3094 d_k += '_time'
3095 if d_k not in info and k in s_ks:
3096 info[d_k] = parse_duration(query[k][0])
3097
3098 # Youtube Music Auto-generated description
3099 if video_description:
3100 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3101 if mobj:
3102 release_year = mobj.group('release_year')
3103 release_date = mobj.group('release_date')
3104 if release_date:
3105 release_date = release_date.replace('-', '')
3106 if not release_year:
3107 release_year = release_date[:4]
3108 info.update({
3109 'album': mobj.group('album'.strip()),
3110 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3111 'track': mobj.group('track').strip(),
3112 'release_date': release_date,
3113 'release_year': int_or_none(release_year),
3114 })
3115
3116 initial_data = None
3117 if webpage:
3118 initial_data = self._extract_yt_initial_variable(
3119 webpage, self._YT_INITIAL_DATA_RE, video_id,
3120 'yt initial data')
3121 if not initial_data:
3122 query = {'videoId': video_id}
3123 query.update(self._get_checkok_params())
3124 initial_data = self._extract_response(
3125 item_id=video_id, ep='next', fatal=False,
3126 ytcfg=master_ytcfg, query=query,
3127 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3128 note='Downloading initial data API JSON')
3129
3130 try:
3131 # This will error if there is no livechat
3132 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3133 info.setdefault('subtitles', {})['live_chat'] = [{
3134 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3135 'video_id': video_id,
3136 'ext': 'json',
3137 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3138 }]
3139 except (KeyError, IndexError, TypeError):
3140 pass
3141
3142 if initial_data:
3143 info['chapters'] = (
3144 self._extract_chapters_from_json(initial_data, duration)
3145 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3146 or None)
3147
3148 contents = try_get(
3149 initial_data,
3150 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3151 list) or []
3152 for content in contents:
3153 vpir = content.get('videoPrimaryInfoRenderer')
3154 if vpir:
3155 stl = vpir.get('superTitleLink')
3156 if stl:
3157 stl = self._get_text(stl)
3158 if try_get(
3159 vpir,
3160 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3161 info['location'] = stl
3162 else:
3163 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3164 if mobj:
3165 info.update({
3166 'series': mobj.group(1),
3167 'season_number': int(mobj.group(2)),
3168 'episode_number': int(mobj.group(3)),
3169 })
3170 for tlb in (try_get(
3171 vpir,
3172 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3173 list) or []):
3174 tbr = tlb.get('toggleButtonRenderer') or {}
3175 for getter, regex in [(
3176 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3177 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3178 lambda x: x['accessibility'],
3179 lambda x: x['accessibilityData']['accessibilityData'],
3180 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3181 label = (try_get(tbr, getter, dict) or {}).get('label')
3182 if label:
3183 mobj = re.match(regex, label)
3184 if mobj:
3185 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3186 break
3187 sbr_tooltip = try_get(
3188 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3189 if sbr_tooltip:
3190 like_count, dislike_count = sbr_tooltip.split(' / ')
3191 info.update({
3192 'like_count': str_to_int(like_count),
3193 'dislike_count': str_to_int(dislike_count),
3194 })
3195 vsir = content.get('videoSecondaryInfoRenderer')
3196 if vsir:
3197 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3198 rows = try_get(
3199 vsir,
3200 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3201 list) or []
3202 multiple_songs = False
3203 for row in rows:
3204 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3205 multiple_songs = True
3206 break
3207 for row in rows:
3208 mrr = row.get('metadataRowRenderer') or {}
3209 mrr_title = mrr.get('title')
3210 if not mrr_title:
3211 continue
3212 mrr_title = self._get_text(mrr, 'title')
3213 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3214 if mrr_title == 'License':
3215 info['license'] = mrr_contents_text
3216 elif not multiple_songs:
3217 if mrr_title == 'Album':
3218 info['album'] = mrr_contents_text
3219 elif mrr_title == 'Artist':
3220 info['artist'] = mrr_contents_text
3221 elif mrr_title == 'Song':
3222 info['track'] = mrr_contents_text
3223
3224 fallbacks = {
3225 'channel': 'uploader',
3226 'channel_id': 'uploader_id',
3227 'channel_url': 'uploader_url',
3228 }
3229 for to, frm in fallbacks.items():
3230 if not info.get(to):
3231 info[to] = info.get(frm)
3232
3233 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3234 v = info.get(s_k)
3235 if v:
3236 info[d_k] = v
3237
3238 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3239 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3240 is_membersonly = None
3241 is_premium = None
3242 if initial_data and is_private is not None:
3243 is_membersonly = False
3244 is_premium = False
3245 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3246 badge_labels = set()
3247 for content in contents:
3248 if not isinstance(content, dict):
3249 continue
3250 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3251 for badge_label in badge_labels:
3252 if badge_label.lower() == 'members only':
3253 is_membersonly = True
3254 elif badge_label.lower() == 'premium':
3255 is_premium = True
3256 elif badge_label.lower() == 'unlisted':
3257 is_unlisted = True
3258
3259 info['availability'] = self._availability(
3260 is_private=is_private,
3261 needs_premium=is_premium,
3262 needs_subscription=is_membersonly,
3263 needs_auth=info['age_limit'] >= 18,
3264 is_unlisted=None if is_private is None else is_unlisted)
3265
3266 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3267
3268 self.mark_watched(video_id, player_responses)
3269
3270 return info
3271
3272
3273class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3274
3275 def _extract_channel_id(self, webpage):
3276 channel_id = self._html_search_meta(
3277 'channelId', webpage, 'channel id', default=None)
3278 if channel_id:
3279 return channel_id
3280 channel_url = self._html_search_meta(
3281 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3282 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3283 'twitter:app:url:googleplay'), webpage, 'channel url')
3284 return self._search_regex(
3285 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3286 channel_url, 'channel id')
3287
3288 @staticmethod
3289 def _extract_basic_item_renderer(item):
3290 # Modified from _extract_grid_item_renderer
3291 known_basic_renderers = (
3292 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3293 )
3294 for key, renderer in item.items():
3295 if not isinstance(renderer, dict):
3296 continue
3297 elif key in known_basic_renderers:
3298 return renderer
3299 elif key.startswith('grid') and key.endswith('Renderer'):
3300 return renderer
3301
3302 def _grid_entries(self, grid_renderer):
3303 for item in grid_renderer['items']:
3304 if not isinstance(item, dict):
3305 continue
3306 renderer = self._extract_basic_item_renderer(item)
3307 if not isinstance(renderer, dict):
3308 continue
3309 title = self._get_text(renderer, 'title')
3310
3311 # playlist
3312 playlist_id = renderer.get('playlistId')
3313 if playlist_id:
3314 yield self.url_result(
3315 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3316 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3317 video_title=title)
3318 continue
3319 # video
3320 video_id = renderer.get('videoId')
3321 if video_id:
3322 yield self._extract_video(renderer)
3323 continue
3324 # channel
3325 channel_id = renderer.get('channelId')
3326 if channel_id:
3327 yield self.url_result(
3328 'https://www.youtube.com/channel/%s' % channel_id,
3329 ie=YoutubeTabIE.ie_key(), video_title=title)
3330 continue
3331 # generic endpoint URL support
3332 ep_url = urljoin('https://www.youtube.com/', try_get(
3333 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3334 compat_str))
3335 if ep_url:
3336 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3337 if ie.suitable(ep_url):
3338 yield self.url_result(
3339 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3340 break
3341
3342 def _shelf_entries_from_content(self, shelf_renderer):
3343 content = shelf_renderer.get('content')
3344 if not isinstance(content, dict):
3345 return
3346 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3347 if renderer:
3348 # TODO: add support for nested playlists so each shelf is processed
3349 # as separate playlist
3350 # TODO: this includes only first N items
3351 for entry in self._grid_entries(renderer):
3352 yield entry
3353 renderer = content.get('horizontalListRenderer')
3354 if renderer:
3355 # TODO
3356 pass
3357
3358 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3359 ep = try_get(
3360 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3361 compat_str)
3362 shelf_url = urljoin('https://www.youtube.com', ep)
3363 if shelf_url:
3364 # Skipping links to another channels, note that checking for
3365 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3366 # will not work
3367 if skip_channels and '/channels?' in shelf_url:
3368 return
3369 title = self._get_text(shelf_renderer, 'title')
3370 yield self.url_result(shelf_url, video_title=title)
3371 # Shelf may not contain shelf URL, fallback to extraction from content
3372 for entry in self._shelf_entries_from_content(shelf_renderer):
3373 yield entry
3374
3375 def _playlist_entries(self, video_list_renderer):
3376 for content in video_list_renderer['contents']:
3377 if not isinstance(content, dict):
3378 continue
3379 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3380 if not isinstance(renderer, dict):
3381 continue
3382 video_id = renderer.get('videoId')
3383 if not video_id:
3384 continue
3385 yield self._extract_video(renderer)
3386
3387 def _rich_entries(self, rich_grid_renderer):
3388 renderer = try_get(
3389 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3390 video_id = renderer.get('videoId')
3391 if not video_id:
3392 return
3393 yield self._extract_video(renderer)
3394
3395 def _video_entry(self, video_renderer):
3396 video_id = video_renderer.get('videoId')
3397 if video_id:
3398 return self._extract_video(video_renderer)
3399
3400 def _post_thread_entries(self, post_thread_renderer):
3401 post_renderer = try_get(
3402 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3403 if not post_renderer:
3404 return
3405 # video attachment
3406 video_renderer = try_get(
3407 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3408 video_id = video_renderer.get('videoId')
3409 if video_id:
3410 entry = self._extract_video(video_renderer)
3411 if entry:
3412 yield entry
3413 # playlist attachment
3414 playlist_id = try_get(
3415 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3416 if playlist_id:
3417 yield self.url_result(
3418 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3419 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3420 # inline video links
3421 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3422 for run in runs:
3423 if not isinstance(run, dict):
3424 continue
3425 ep_url = try_get(
3426 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3427 if not ep_url:
3428 continue
3429 if not YoutubeIE.suitable(ep_url):
3430 continue
3431 ep_video_id = YoutubeIE._match_id(ep_url)
3432 if video_id == ep_video_id:
3433 continue
3434 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3435
3436 def _post_thread_continuation_entries(self, post_thread_continuation):
3437 contents = post_thread_continuation.get('contents')
3438 if not isinstance(contents, list):
3439 return
3440 for content in contents:
3441 renderer = content.get('backstagePostThreadRenderer')
3442 if not isinstance(renderer, dict):
3443 continue
3444 for entry in self._post_thread_entries(renderer):
3445 yield entry
3446
3447 r''' # unused
3448 def _rich_grid_entries(self, contents):
3449 for content in contents:
3450 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3451 if video_renderer:
3452 entry = self._video_entry(video_renderer)
3453 if entry:
3454 yield entry
3455 '''
3456 def _extract_entries(self, parent_renderer, continuation_list):
3457 # continuation_list is modified in-place with continuation_list = [continuation_token]
3458 continuation_list[:] = [None]
3459 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3460 for content in contents:
3461 if not isinstance(content, dict):
3462 continue
3463 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3464 if not is_renderer:
3465 renderer = content.get('richItemRenderer')
3466 if renderer:
3467 for entry in self._rich_entries(renderer):
3468 yield entry
3469 continuation_list[0] = self._extract_continuation(parent_renderer)
3470 continue
3471 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3472 for isr_content in isr_contents:
3473 if not isinstance(isr_content, dict):
3474 continue
3475
3476 known_renderers = {
3477 'playlistVideoListRenderer': self._playlist_entries,
3478 'gridRenderer': self._grid_entries,
3479 'shelfRenderer': lambda x: self._shelf_entries(x),
3480 'backstagePostThreadRenderer': self._post_thread_entries,
3481 'videoRenderer': lambda x: [self._video_entry(x)],
3482 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3483 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
3484 }
3485 for key, renderer in isr_content.items():
3486 if key not in known_renderers:
3487 continue
3488 for entry in known_renderers[key](renderer):
3489 if entry:
3490 yield entry
3491 continuation_list[0] = self._extract_continuation(renderer)
3492 break
3493
3494 if not continuation_list[0]:
3495 continuation_list[0] = self._extract_continuation(is_renderer)
3496
3497 if not continuation_list[0]:
3498 continuation_list[0] = self._extract_continuation(parent_renderer)
3499
3500 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3501 continuation_list = [None]
3502 extract_entries = lambda x: self._extract_entries(x, continuation_list)
3503 tab_content = try_get(tab, lambda x: x['content'], dict)
3504 if not tab_content:
3505 return
3506 parent_renderer = (
3507 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3508 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3509 for entry in extract_entries(parent_renderer):
3510 yield entry
3511 continuation = continuation_list[0]
3512
3513 for page_num in itertools.count(1):
3514 if not continuation:
3515 break
3516 headers = self.generate_api_headers(
3517 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3518 response = self._extract_response(
3519 item_id='%s page %s' % (item_id, page_num),
3520 query=continuation, headers=headers, ytcfg=ytcfg,
3521 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3522
3523 if not response:
3524 break
3525 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3526 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3527 visitor_data = self._extract_visitor_data(response) or visitor_data
3528
3529 known_continuation_renderers = {
3530 'playlistVideoListContinuation': self._playlist_entries,
3531 'gridContinuation': self._grid_entries,
3532 'itemSectionContinuation': self._post_thread_continuation_entries,
3533 'sectionListContinuation': extract_entries, # for feeds
3534 }
3535 continuation_contents = try_get(
3536 response, lambda x: x['continuationContents'], dict) or {}
3537 continuation_renderer = None
3538 for key, value in continuation_contents.items():
3539 if key not in known_continuation_renderers:
3540 continue
3541 continuation_renderer = value
3542 continuation_list = [None]
3543 for entry in known_continuation_renderers[key](continuation_renderer):
3544 yield entry
3545 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3546 break
3547 if continuation_renderer:
3548 continue
3549
3550 known_renderers = {
3551 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3552 'gridVideoRenderer': (self._grid_entries, 'items'),
3553 'gridChannelRenderer': (self._grid_entries, 'items'),
3554 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3555 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3556 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3557 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3558 }
3559 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3560 continuation_items = try_get(
3561 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3562 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3563 video_items_renderer = None
3564 for key, value in continuation_item.items():
3565 if key not in known_renderers:
3566 continue
3567 video_items_renderer = {known_renderers[key][1]: continuation_items}
3568 continuation_list = [None]
3569 for entry in known_renderers[key][0](video_items_renderer):
3570 yield entry
3571 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3572 break
3573 if video_items_renderer:
3574 continue
3575 break
3576
3577 @staticmethod
3578 def _extract_selected_tab(tabs):
3579 for tab in tabs:
3580 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3581 if renderer.get('selected') is True:
3582 return renderer
3583 else:
3584 raise ExtractorError('Unable to find selected tab')
3585
3586 @classmethod
3587 def _extract_uploader(cls, data):
3588 uploader = {}
3589 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3590 owner = try_get(
3591 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3592 if owner:
3593 uploader['uploader'] = owner.get('text')
3594 uploader['uploader_id'] = try_get(
3595 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3596 uploader['uploader_url'] = urljoin(
3597 'https://www.youtube.com/',
3598 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3599 return {k: v for k, v in uploader.items() if v is not None}
3600
3601 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
3602 playlist_id = title = description = channel_url = channel_name = channel_id = None
3603 tags = []
3604
3605 selected_tab = self._extract_selected_tab(tabs)
3606 renderer = try_get(
3607 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3608 if renderer:
3609 channel_name = renderer.get('title')
3610 channel_url = renderer.get('channelUrl')
3611 channel_id = renderer.get('externalId')
3612 else:
3613 renderer = try_get(
3614 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3615
3616 if renderer:
3617 title = renderer.get('title')
3618 description = renderer.get('description', '')
3619 playlist_id = channel_id
3620 tags = renderer.get('keywords', '').split()
3621
3622 thumbnails = (
3623 self._extract_thumbnails(renderer, 'avatar')
3624 or self._extract_thumbnails(
3625 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3626 ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail')))
3627
3628 if playlist_id is None:
3629 playlist_id = item_id
3630 if title is None:
3631 title = (
3632 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3633 or playlist_id)
3634 title += format_field(selected_tab, 'title', ' - %s')
3635 title += format_field(selected_tab, 'expandedText', ' - %s')
3636 metadata = {
3637 'playlist_id': playlist_id,
3638 'playlist_title': title,
3639 'playlist_description': description,
3640 'uploader': channel_name,
3641 'uploader_id': channel_id,
3642 'uploader_url': channel_url,
3643 'thumbnails': thumbnails,
3644 'tags': tags,
3645 }
3646 availability = self._extract_availability(data)
3647 if availability:
3648 metadata['availability'] = availability
3649 if not channel_id:
3650 metadata.update(self._extract_uploader(data))
3651 metadata.update({
3652 'channel': metadata['uploader'],
3653 'channel_id': metadata['uploader_id'],
3654 'channel_url': metadata['uploader_url']})
3655 return self.playlist_result(
3656 self._entries(
3657 selected_tab, playlist_id, ytcfg,
3658 self._extract_account_syncid(ytcfg, data),
3659 self._extract_visitor_data(data, ytcfg)),
3660 **metadata)
3661
3662 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3663 first_id = last_id = response = None
3664 for page_num in itertools.count(1):
3665 videos = list(self._playlist_entries(playlist))
3666 if not videos:
3667 return
3668 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3669 if start >= len(videos):
3670 return
3671 for video in videos[start:]:
3672 if video['id'] == first_id:
3673 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3674 return
3675 yield video
3676 first_id = first_id or videos[0]['id']
3677 last_id = videos[-1]['id']
3678 watch_endpoint = try_get(
3679 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3680 headers = self.generate_api_headers(
3681 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3682 visitor_data=self._extract_visitor_data(response, data, ytcfg))
3683 query = {
3684 'playlistId': playlist_id,
3685 'videoId': watch_endpoint.get('videoId') or last_id,
3686 'index': watch_endpoint.get('index') or len(videos),
3687 'params': watch_endpoint.get('params') or 'OAE%3D'
3688 }
3689 response = self._extract_response(
3690 item_id='%s page %d' % (playlist_id, page_num),
3691 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3692 check_get_keys='contents'
3693 )
3694 playlist = try_get(
3695 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3696
3697 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
3698 title = playlist.get('title') or try_get(
3699 data, lambda x: x['titleText']['simpleText'], compat_str)
3700 playlist_id = playlist.get('playlistId') or item_id
3701
3702 # Delegating everything except mix playlists to regular tab-based playlist URL
3703 playlist_url = urljoin(url, try_get(
3704 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3705 compat_str))
3706 if playlist_url and playlist_url != url:
3707 return self.url_result(
3708 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3709 video_title=title)
3710
3711 return self.playlist_result(
3712 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
3713 playlist_id=playlist_id, playlist_title=title)
3714
3715 def _extract_availability(self, data):
3716 """
3717 Gets the availability of a given playlist/tab.
3718 Note: Unless YouTube tells us explicitly, we do not assume it is public
3719 @param data: response
3720 """
3721 is_private = is_unlisted = None
3722 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3723 badge_labels = self._extract_badges(renderer)
3724
3725 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3726 privacy_dropdown_entries = try_get(
3727 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3728 for renderer_dict in privacy_dropdown_entries:
3729 is_selected = try_get(
3730 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3731 if not is_selected:
3732 continue
3733 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
3734 if label:
3735 badge_labels.add(label.lower())
3736 break
3737
3738 for badge_label in badge_labels:
3739 if badge_label == 'unlisted':
3740 is_unlisted = True
3741 elif badge_label == 'private':
3742 is_private = True
3743 elif badge_label == 'public':
3744 is_unlisted = is_private = False
3745 return self._availability(is_private, False, False, False, is_unlisted)
3746
3747 @staticmethod
3748 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3749 sidebar_renderer = try_get(
3750 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3751 for item in sidebar_renderer:
3752 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3753 if renderer:
3754 return renderer
3755
3756 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
3757 """
3758 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3759 """
3760 browse_id = params = None
3761 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3762 if not renderer:
3763 return
3764 menu_renderer = try_get(
3765 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3766 for menu_item in menu_renderer:
3767 if not isinstance(menu_item, dict):
3768 continue
3769 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3770 text = try_get(
3771 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3772 if not text or text.lower() != 'show unavailable videos':
3773 continue
3774 browse_endpoint = try_get(
3775 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3776 browse_id = browse_endpoint.get('browseId')
3777 params = browse_endpoint.get('params')
3778 break
3779
3780 headers = self.generate_api_headers(
3781 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3782 visitor_data=self._extract_visitor_data(data, ytcfg))
3783 query = {
3784 'params': params or 'wgYCCAA=',
3785 'browseId': browse_id or 'VL%s' % item_id
3786 }
3787 return self._extract_response(
3788 item_id=item_id, headers=headers, query=query,
3789 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
3790 note='Downloading API JSON with unavailable videos')
3791
3792 def _extract_webpage(self, url, item_id, fatal=True):
3793 retries = self.get_param('extractor_retries', 3)
3794 count = -1
3795 webpage = data = last_error = None
3796 while count < retries:
3797 count += 1
3798 # Sometimes youtube returns a webpage with incomplete ytInitialData
3799 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3800 if last_error:
3801 self.report_warning('%s. Retrying ...' % last_error)
3802 try:
3803 webpage = self._download_webpage(
3804 url, item_id,
3805 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3806 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3807 except ExtractorError as e:
3808 if isinstance(e.cause, network_exceptions):
3809 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3810 last_error = error_to_compat_str(e.cause or e.msg)
3811 if count < retries:
3812 continue
3813 if fatal:
3814 raise
3815 self.report_warning(error_to_compat_str(e))
3816 break
3817 else:
3818 try:
3819 self._extract_and_report_alerts(data)
3820 except ExtractorError as e:
3821 if fatal:
3822 raise
3823 self.report_warning(error_to_compat_str(e))
3824 break
3825
3826 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3827 break
3828
3829 last_error = 'Incomplete yt initial data received'
3830 if count >= retries:
3831 if fatal:
3832 raise ExtractorError(last_error)
3833 self.report_warning(last_error)
3834 break
3835
3836 return webpage, data
3837
3838 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3839 data = None
3840 if 'webpage' not in self._configuration_arg('skip'):
3841 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3842 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3843 if not data:
3844 if not ytcfg and self.is_authenticated:
3845 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3846 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3847 raise ExtractorError(
3848 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3849 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3850 expected=True)
3851 self.report_warning(msg, only_once=True)
3852 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3853 return data, ytcfg
3854
3855 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3856 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3857 resolve_response = self._extract_response(
3858 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3859 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3860 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3861 for ep_key, ep in endpoints.items():
3862 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3863 if params:
3864 return self._extract_response(
3865 item_id=item_id, query=params, ep=ep, headers=headers,
3866 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3867 check_get_keys=('contents', 'currentVideoEndpoint'))
3868 err_note = 'Failed to resolve url (does the playlist exist?)'
3869 if fatal:
3870 raise ExtractorError(err_note, expected=True)
3871 self.report_warning(err_note, item_id)
3872
3873 @staticmethod
3874 def _smuggle_data(entries, data):
3875 for entry in entries:
3876 if data:
3877 entry['url'] = smuggle_url(entry['url'], data)
3878 yield entry
3879
3880 _SEARCH_PARAMS = None
3881
3882 def _search_results(self, query, params=NO_DEFAULT):
3883 data = {'query': query}
3884 if params is NO_DEFAULT:
3885 params = self._SEARCH_PARAMS
3886 if params:
3887 data['params'] = params
3888 continuation_list = [None]
3889 for page_num in itertools.count(1):
3890 data.update(continuation_list[0] or {})
3891 search = self._extract_response(
3892 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3893 check_get_keys=('contents', 'onResponseReceivedCommands'))
3894 slr_contents = try_get(
3895 search,
3896 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3897 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3898 list)
3899 yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3900 if not continuation_list[0]:
3901 break
3902
3903
3904class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3905 IE_DESC = 'YouTube Tabs'
3906 _VALID_URL = r'''(?x:
3907 https?://
3908 (?:\w+\.)?
3909 (?:
3910 youtube(?:kids)?\.com|
3911 %(invidious)s
3912 )/
3913 (?:
3914 (?P<channel_type>channel|c|user|browse)/|
3915 (?P<not_channel>
3916 feed/|hashtag/|
3917 (?:playlist|watch)\?.*?\blist=
3918 )|
3919 (?!(?:%(reserved_names)s)\b) # Direct URLs
3920 )
3921 (?P<id>[^/?\#&]+)
3922 )''' % {
3923 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3924 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3925 }
3926 IE_NAME = 'youtube:tab'
3927
3928 _TESTS = [{
3929 'note': 'playlists, multipage',
3930 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3931 'playlist_mincount': 94,
3932 'info_dict': {
3933 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3934 'title': 'Игорь Клейнер - Playlists',
3935 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3936 'uploader': 'Игорь Клейнер',
3937 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3938 },
3939 }, {
3940 'note': 'playlists, multipage, different order',
3941 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3942 'playlist_mincount': 94,
3943 'info_dict': {
3944 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3945 'title': 'Игорь Клейнер - Playlists',
3946 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3947 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3948 'uploader': 'Игорь Клейнер',
3949 },
3950 }, {
3951 'note': 'playlists, series',
3952 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3953 'playlist_mincount': 5,
3954 'info_dict': {
3955 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3956 'title': '3Blue1Brown - Playlists',
3957 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3958 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3959 'uploader': '3Blue1Brown',
3960 },
3961 }, {
3962 'note': 'playlists, singlepage',
3963 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3964 'playlist_mincount': 4,
3965 'info_dict': {
3966 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3967 'title': 'ThirstForScience - Playlists',
3968 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3969 'uploader': 'ThirstForScience',
3970 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3971 }
3972 }, {
3973 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3974 'only_matching': True,
3975 }, {
3976 'note': 'basic, single video playlist',
3977 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3978 'info_dict': {
3979 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3980 'uploader': 'Sergey M.',
3981 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3982 'title': 'youtube-dl public playlist',
3983 },
3984 'playlist_count': 1,
3985 }, {
3986 'note': 'empty playlist',
3987 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3988 'info_dict': {
3989 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3990 'uploader': 'Sergey M.',
3991 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3992 'title': 'youtube-dl empty playlist',
3993 },
3994 'playlist_count': 0,
3995 }, {
3996 'note': 'Home tab',
3997 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3998 'info_dict': {
3999 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4000 'title': 'lex will - Home',
4001 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4002 'uploader': 'lex will',
4003 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4004 },
4005 'playlist_mincount': 2,
4006 }, {
4007 'note': 'Videos tab',
4008 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4009 'info_dict': {
4010 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4011 'title': 'lex will - Videos',
4012 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4013 'uploader': 'lex will',
4014 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4015 },
4016 'playlist_mincount': 975,
4017 }, {
4018 'note': 'Videos tab, sorted by popular',
4019 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4020 'info_dict': {
4021 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4022 'title': 'lex will - Videos',
4023 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4024 'uploader': 'lex will',
4025 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4026 },
4027 'playlist_mincount': 199,
4028 }, {
4029 'note': 'Playlists tab',
4030 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4031 'info_dict': {
4032 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4033 'title': 'lex will - Playlists',
4034 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4035 'uploader': 'lex will',
4036 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4037 },
4038 'playlist_mincount': 17,
4039 }, {
4040 'note': 'Community tab',
4041 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4042 'info_dict': {
4043 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4044 'title': 'lex will - Community',
4045 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4046 'uploader': 'lex will',
4047 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4048 },
4049 'playlist_mincount': 18,
4050 }, {
4051 'note': 'Channels tab',
4052 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4053 'info_dict': {
4054 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4055 'title': 'lex will - Channels',
4056 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4057 'uploader': 'lex will',
4058 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4059 },
4060 'playlist_mincount': 12,
4061 }, {
4062 'note': 'Search tab',
4063 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4064 'playlist_mincount': 40,
4065 'info_dict': {
4066 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4067 'title': '3Blue1Brown - Search - linear algebra',
4068 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4069 'uploader': '3Blue1Brown',
4070 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4071 },
4072 }, {
4073 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4074 'only_matching': True,
4075 }, {
4076 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4077 'only_matching': True,
4078 }, {
4079 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4080 'only_matching': True,
4081 }, {
4082 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4083 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4084 'info_dict': {
4085 'title': '29C3: Not my department',
4086 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4087 'uploader': 'Christiaan008',
4088 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4089 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
4090 },
4091 'playlist_count': 96,
4092 }, {
4093 'note': 'Large playlist',
4094 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4095 'info_dict': {
4096 'title': 'Uploads from Cauchemar',
4097 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4098 'uploader': 'Cauchemar',
4099 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4100 },
4101 'playlist_mincount': 1123,
4102 }, {
4103 'note': 'even larger playlist, 8832 videos',
4104 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4105 'only_matching': True,
4106 }, {
4107 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4108 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4109 'info_dict': {
4110 'title': 'Uploads from Interstellar Movie',
4111 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4112 'uploader': 'Interstellar Movie',
4113 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4114 },
4115 'playlist_mincount': 21,
4116 }, {
4117 'note': 'Playlist with "show unavailable videos" button',
4118 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4119 'info_dict': {
4120 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4121 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4122 'uploader': 'Phim Siêu Nhân Nhật Bản',
4123 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4124 },
4125 'playlist_mincount': 200,
4126 }, {
4127 'note': 'Playlist with unavailable videos in page 7',
4128 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
4129 'info_dict': {
4130 'title': 'Uploads from BlankTV',
4131 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4132 'uploader': 'BlankTV',
4133 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4134 },
4135 'playlist_mincount': 1000,
4136 }, {
4137 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4138 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4139 'info_dict': {
4140 'title': 'Data Analysis with Dr Mike Pound',
4141 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4142 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4143 'uploader': 'Computerphile',
4144 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
4145 },
4146 'playlist_mincount': 11,
4147 }, {
4148 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4149 'only_matching': True,
4150 }, {
4151 'note': 'Playlist URL that does not actually serve a playlist',
4152 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4153 'info_dict': {
4154 'id': 'FqZTN594JQw',
4155 'ext': 'webm',
4156 'title': "Smiley's People 01 detective, Adventure Series, Action",
4157 'uploader': 'STREEM',
4158 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4159 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4160 'upload_date': '20150526',
4161 'license': 'Standard YouTube License',
4162 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4163 'categories': ['People & Blogs'],
4164 'tags': list,
4165 'view_count': int,
4166 'like_count': int,
4167 'dislike_count': int,
4168 },
4169 'params': {
4170 'skip_download': True,
4171 },
4172 'skip': 'This video is not available.',
4173 'add_ie': [YoutubeIE.ie_key()],
4174 }, {
4175 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4176 'only_matching': True,
4177 }, {
4178 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4179 'only_matching': True,
4180 }, {
4181 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4182 'info_dict': {
4183 'id': '3yImotZU3tw', # This will keep changing
4184 'ext': 'mp4',
4185 'title': compat_str,
4186 'uploader': 'Sky News',
4187 'uploader_id': 'skynews',
4188 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4189 'upload_date': r're:\d{8}',
4190 'description': compat_str,
4191 'categories': ['News & Politics'],
4192 'tags': list,
4193 'like_count': int,
4194 'dislike_count': int,
4195 },
4196 'params': {
4197 'skip_download': True,
4198 },
4199 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4200 }, {
4201 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4202 'info_dict': {
4203 'id': 'a48o2S1cPoo',
4204 'ext': 'mp4',
4205 'title': 'The Young Turks - Live Main Show',
4206 'uploader': 'The Young Turks',
4207 'uploader_id': 'TheYoungTurks',
4208 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4209 'upload_date': '20150715',
4210 'license': 'Standard YouTube License',
4211 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4212 'categories': ['News & Politics'],
4213 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4214 'like_count': int,
4215 'dislike_count': int,
4216 },
4217 'params': {
4218 'skip_download': True,
4219 },
4220 'only_matching': True,
4221 }, {
4222 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4223 'only_matching': True,
4224 }, {
4225 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4226 'only_matching': True,
4227 }, {
4228 'note': 'A channel that is not live. Should raise error',
4229 'url': 'https://www.youtube.com/user/numberphile/live',
4230 'only_matching': True,
4231 }, {
4232 'url': 'https://www.youtube.com/feed/trending',
4233 'only_matching': True,
4234 }, {
4235 'url': 'https://www.youtube.com/feed/library',
4236 'only_matching': True,
4237 }, {
4238 'url': 'https://www.youtube.com/feed/history',
4239 'only_matching': True,
4240 }, {
4241 'url': 'https://www.youtube.com/feed/subscriptions',
4242 'only_matching': True,
4243 }, {
4244 'url': 'https://www.youtube.com/feed/watch_later',
4245 'only_matching': True,
4246 }, {
4247 'note': 'Recommended - redirects to home page.',
4248 'url': 'https://www.youtube.com/feed/recommended',
4249 'only_matching': True,
4250 }, {
4251 'note': 'inline playlist with not always working continuations',
4252 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4253 'only_matching': True,
4254 }, {
4255 'url': 'https://www.youtube.com/course',
4256 'only_matching': True,
4257 }, {
4258 'url': 'https://www.youtube.com/zsecurity',
4259 'only_matching': True,
4260 }, {
4261 'url': 'http://www.youtube.com/NASAgovVideo/videos',
4262 'only_matching': True,
4263 }, {
4264 'url': 'https://www.youtube.com/TheYoungTurks/live',
4265 'only_matching': True,
4266 }, {
4267 'url': 'https://www.youtube.com/hashtag/cctv9',
4268 'info_dict': {
4269 'id': 'cctv9',
4270 'title': '#cctv9',
4271 },
4272 'playlist_mincount': 350,
4273 }, {
4274 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4275 'only_matching': True,
4276 }, {
4277 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4278 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4279 'only_matching': True
4280 }, {
4281 'note': '/browse/ should redirect to /channel/',
4282 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4283 'only_matching': True
4284 }, {
4285 'note': 'VLPL, should redirect to playlist?list=PL...',
4286 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4287 'info_dict': {
4288 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4289 'uploader': 'NoCopyrightSounds',
4290 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4291 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4292 'title': 'NCS Releases',
4293 },
4294 'playlist_mincount': 166,
4295 }, {
4296 'note': 'Topic, should redirect to playlist?list=UU...',
4297 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4298 'info_dict': {
4299 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4300 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4301 'title': 'Uploads from Royalty Free Music - Topic',
4302 'uploader': 'Royalty Free Music - Topic',
4303 },
4304 'expected_warnings': [
4305 'A channel/user page was given',
4306 'The URL does not have a videos tab',
4307 ],
4308 'playlist_mincount': 101,
4309 }, {
4310 'note': 'Topic without a UU playlist',
4311 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4312 'info_dict': {
4313 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4314 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4315 },
4316 'expected_warnings': [
4317 'A channel/user page was given',
4318 'The URL does not have a videos tab',
4319 'Falling back to channel URL',
4320 ],
4321 'playlist_mincount': 9,
4322 }, {
4323 'note': 'Youtube music Album',
4324 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4325 'info_dict': {
4326 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4327 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4328 },
4329 'playlist_count': 50,
4330 }, {
4331 'note': 'unlisted single video playlist',
4332 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4333 'info_dict': {
4334 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4335 'uploader': 'colethedj',
4336 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4337 'title': 'yt-dlp unlisted playlist test',
4338 'availability': 'unlisted'
4339 },
4340 'playlist_count': 1,
4341 }, {
4342 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4343 'url': 'https://www.youtube.com/feed/recommended',
4344 'info_dict': {
4345 'id': 'recommended',
4346 'title': 'recommended',
4347 },
4348 'playlist_mincount': 50,
4349 'params': {
4350 'skip_download': True,
4351 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4352 },
4353 }, {
4354 'note': 'API Fallback: /videos tab, sorted by oldest first',
4355 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4356 'info_dict': {
4357 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4358 'title': 'Cody\'sLab - Videos',
4359 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4360 'uploader': 'Cody\'sLab',
4361 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4362 },
4363 'playlist_mincount': 650,
4364 'params': {
4365 'skip_download': True,
4366 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4367 },
4368 }, {
4369 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4370 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4371 'info_dict': {
4372 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4373 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4374 'title': 'Uploads from Royalty Free Music - Topic',
4375 'uploader': 'Royalty Free Music - Topic',
4376 },
4377 'expected_warnings': [
4378 'A channel/user page was given',
4379 'The URL does not have a videos tab',
4380 ],
4381 'playlist_mincount': 101,
4382 'params': {
4383 'skip_download': True,
4384 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4385 },
4386 }]
4387
4388 @classmethod
4389 def suitable(cls, url):
4390 return False if YoutubeIE.suitable(url) else super(
4391 YoutubeTabIE, cls).suitable(url)
4392
4393 def _real_extract(self, url):
4394 url, smuggled_data = unsmuggle_url(url, {})
4395 if self.is_music_url(url):
4396 smuggled_data['is_music_url'] = True
4397 info_dict = self.__real_extract(url, smuggled_data)
4398 if info_dict.get('entries'):
4399 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4400 return info_dict
4401
4402 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$')
4403
4404 def __real_extract(self, url, smuggled_data):
4405 item_id = self._match_id(url)
4406 url = compat_urlparse.urlunparse(
4407 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4408 compat_opts = self.get_param('compat_opts', [])
4409
4410 def get_mobj(url):
4411 mobj = self._URL_RE.match(url).groupdict()
4412 mobj.update((k, '') for k, v in mobj.items() if v is None)
4413 return mobj
4414
4415 mobj, redirect_warning = get_mobj(url), None
4416 # Youtube returns incomplete data if tabname is not lower case
4417 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4418 if is_channel:
4419 if smuggled_data.get('is_music_url'):
4420 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
4421 item_id = item_id[2:]
4422 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
4423 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4424 mdata = self._extract_tab_endpoint(
4425 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
4426 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
4427 get_all=False, expected_type=compat_str)
4428 if not murl:
4429 raise ExtractorError('Failed to resolve album to playlist')
4430 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4431 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
4432 pre = f'https://www.youtube.com/channel/{item_id}'
4433
4434 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4435 # Home URLs should redirect to /videos/
4436 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
4437 'To download only the videos in the home page, add a "/featured" to the URL')
4438 tab = '/videos'
4439
4440 url = ''.join((pre, tab, post))
4441 mobj = get_mobj(url)
4442
4443 # Handle both video/playlist URLs
4444 qs = parse_qs(url)
4445 video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]
4446
4447 if not video_id and mobj['not_channel'].startswith('watch'):
4448 if not playlist_id:
4449 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4450 raise ExtractorError('Unable to recognize tab page')
4451 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4452 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
4453 url = f'https://www.youtube.com/playlist?list={playlist_id}'
4454 mobj = get_mobj(url)
4455
4456 if video_id and playlist_id:
4457 if self.get_param('noplaylist'):
4458 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4459 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4460 ie=YoutubeIE.ie_key(), video_id=video_id)
4461 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
4462
4463 data, ytcfg = self._extract_data(url, item_id)
4464
4465 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4466 if tabs:
4467 selected_tab = self._extract_selected_tab(tabs)
4468 tab_name = selected_tab.get('title', '')
4469 if 'no-youtube-channel-redirect' not in compat_opts:
4470 if mobj['tab'] == '/live':
4471 # Live tab should have redirected to the video
4472 raise ExtractorError('The channel is not currently live', expected=True)
4473 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4474 redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab'
4475 if not mobj['not_channel'] and item_id[:2] == 'UC':
4476 # Topic channels don't have /videos. Use the equivalent playlist instead
4477 pl_id = f'UU{item_id[2:]}'
4478 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
4479 try:
4480 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True)
4481 except ExtractorError:
4482 redirect_warning += ' and the playlist redirect gave error'
4483 else:
4484 item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:]
4485 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
4486 if tab_name.lower() != mobj['tab'][1:]:
4487 redirect_warning += f'. {tab_name} tab is being downloaded instead'
4488
4489 if redirect_warning:
4490 self.report_warning(redirect_warning)
4491 self.write_debug(f'Final URL: {url}')
4492
4493 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4494 if 'no-youtube-unavailable-videos' not in compat_opts:
4495 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4496 self._extract_and_report_alerts(data, only_once=True)
4497 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4498 if tabs:
4499 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4500
4501 playlist = traverse_obj(
4502 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
4503 if playlist:
4504 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4505
4506 video_id = traverse_obj(
4507 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
4508 if video_id:
4509 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4510 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
4511 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4512 ie=YoutubeIE.ie_key(), video_id=video_id)
4513
4514 raise ExtractorError('Unable to recognize tab page')
4515
4516
4517class YoutubePlaylistIE(InfoExtractor):
4518 IE_DESC = 'YouTube playlists'
4519 _VALID_URL = r'''(?x)(?:
4520 (?:https?://)?
4521 (?:\w+\.)?
4522 (?:
4523 (?:
4524 youtube(?:kids)?\.com|
4525 %(invidious)s
4526 )
4527 /.*?\?.*?\blist=
4528 )?
4529 (?P<id>%(playlist_id)s)
4530 )''' % {
4531 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4532 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4533 }
4534 IE_NAME = 'youtube:playlist'
4535 _TESTS = [{
4536 'note': 'issue #673',
4537 'url': 'PLBB231211A4F62143',
4538 'info_dict': {
4539 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4540 'id': 'PLBB231211A4F62143',
4541 'uploader': 'Wickydoo',
4542 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4543 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4544 },
4545 'playlist_mincount': 29,
4546 }, {
4547 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4548 'info_dict': {
4549 'title': 'YDL_safe_search',
4550 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4551 },
4552 'playlist_count': 2,
4553 'skip': 'This playlist is private',
4554 }, {
4555 'note': 'embedded',
4556 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4557 'playlist_count': 4,
4558 'info_dict': {
4559 'title': 'JODA15',
4560 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4561 'uploader': 'milan',
4562 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4563 }
4564 }, {
4565 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4566 'playlist_mincount': 654,
4567 'info_dict': {
4568 'title': '2018 Chinese New Singles (11/6 updated)',
4569 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4570 'uploader': 'LBK',
4571 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4572 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4573 }
4574 }, {
4575 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4576 'only_matching': True,
4577 }, {
4578 # music album playlist
4579 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4580 'only_matching': True,
4581 }]
4582
4583 @classmethod
4584 def suitable(cls, url):
4585 if YoutubeTabIE.suitable(url):
4586 return False
4587 from ..utils import parse_qs
4588 qs = parse_qs(url)
4589 if qs.get('v', [None])[0]:
4590 return False
4591 return super(YoutubePlaylistIE, cls).suitable(url)
4592
4593 def _real_extract(self, url):
4594 playlist_id = self._match_id(url)
4595 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4596 url = update_url_query(
4597 'https://www.youtube.com/playlist',
4598 parse_qs(url) or {'list': playlist_id})
4599 if is_music_url:
4600 url = smuggle_url(url, {'is_music_url': True})
4601 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4602
4603
4604class YoutubeYtBeIE(InfoExtractor):
4605 IE_DESC = 'youtu.be'
4606 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4607 _TESTS = [{
4608 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4609 'info_dict': {
4610 'id': 'yeWKywCrFtk',
4611 'ext': 'mp4',
4612 'title': 'Small Scale Baler and Braiding Rugs',
4613 'uploader': 'Backus-Page House Museum',
4614 'uploader_id': 'backuspagemuseum',
4615 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4616 'upload_date': '20161008',
4617 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4618 'categories': ['Nonprofits & Activism'],
4619 'tags': list,
4620 'like_count': int,
4621 'dislike_count': int,
4622 },
4623 'params': {
4624 'noplaylist': True,
4625 'skip_download': True,
4626 },
4627 }, {
4628 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4629 'only_matching': True,
4630 }]
4631
4632 def _real_extract(self, url):
4633 mobj = self._match_valid_url(url)
4634 video_id = mobj.group('id')
4635 playlist_id = mobj.group('playlist_id')
4636 return self.url_result(
4637 update_url_query('https://www.youtube.com/watch', {
4638 'v': video_id,
4639 'list': playlist_id,
4640 'feature': 'youtu.be',
4641 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4642
4643
4644class YoutubeYtUserIE(InfoExtractor):
4645 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
4646 _VALID_URL = r'ytuser:(?P<id>.+)'
4647 _TESTS = [{
4648 'url': 'ytuser:phihag',
4649 'only_matching': True,
4650 }]
4651
4652 def _real_extract(self, url):
4653 user_id = self._match_id(url)
4654 return self.url_result(
4655 'https://www.youtube.com/user/%s/videos' % user_id,
4656 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4657
4658
4659class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4660 IE_NAME = 'youtube:favorites'
4661 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
4662 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4663 _LOGIN_REQUIRED = True
4664 _TESTS = [{
4665 'url': ':ytfav',
4666 'only_matching': True,
4667 }, {
4668 'url': ':ytfavorites',
4669 'only_matching': True,
4670 }]
4671
4672 def _real_extract(self, url):
4673 return self.url_result(
4674 'https://www.youtube.com/playlist?list=LL',
4675 ie=YoutubeTabIE.ie_key())
4676
4677
4678class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4679 IE_DESC = 'YouTube search'
4680 IE_NAME = 'youtube:search'
4681 _SEARCH_KEY = 'ytsearch'
4682 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
4683 _TESTS = []
4684
4685
4686class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4687 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4688 _SEARCH_KEY = 'ytsearchdate'
4689 IE_DESC = 'YouTube search, newest videos first'
4690 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
4691
4692
4693class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
4694 IE_DESC = 'YouTube search URLs with sorting and filter support'
4695 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4696 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4697 _TESTS = [{
4698 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4699 'playlist_mincount': 5,
4700 'info_dict': {
4701 'id': 'youtube-dl test video',
4702 'title': 'youtube-dl test video',
4703 }
4704 }, {
4705 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4706 'playlist_mincount': 5,
4707 'info_dict': {
4708 'id': 'python',
4709 'title': 'python',
4710 }
4711
4712 }, {
4713 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4714 'only_matching': True,
4715 }]
4716
4717 def _real_extract(self, url):
4718 qs = parse_qs(url)
4719 query = (qs.get('search_query') or qs.get('q'))[0]
4720 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
4721
4722
4723class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4724 """
4725 Base class for feed extractors
4726 Subclasses must define the _FEED_NAME property.
4727 """
4728 _LOGIN_REQUIRED = True
4729 _TESTS = []
4730
4731 @property
4732 def IE_NAME(self):
4733 return 'youtube:%s' % self._FEED_NAME
4734
4735 def _real_extract(self, url):
4736 return self.url_result(
4737 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4738 ie=YoutubeTabIE.ie_key())
4739
4740
4741class YoutubeWatchLaterIE(InfoExtractor):
4742 IE_NAME = 'youtube:watchlater'
4743 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
4744 _VALID_URL = r':ytwatchlater'
4745 _TESTS = [{
4746 'url': ':ytwatchlater',
4747 'only_matching': True,
4748 }]
4749
4750 def _real_extract(self, url):
4751 return self.url_result(
4752 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4753
4754
4755class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4756 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
4757 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4758 _FEED_NAME = 'recommended'
4759 _LOGIN_REQUIRED = False
4760 _TESTS = [{
4761 'url': ':ytrec',
4762 'only_matching': True,
4763 }, {
4764 'url': ':ytrecommended',
4765 'only_matching': True,
4766 }, {
4767 'url': 'https://youtube.com',
4768 'only_matching': True,
4769 }]
4770
4771
4772class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4773 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
4774 _VALID_URL = r':ytsub(?:scription)?s?'
4775 _FEED_NAME = 'subscriptions'
4776 _TESTS = [{
4777 'url': ':ytsubs',
4778 'only_matching': True,
4779 }, {
4780 'url': ':ytsubscriptions',
4781 'only_matching': True,
4782 }]
4783
4784
4785class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4786 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
4787 _VALID_URL = r':ythis(?:tory)?'
4788 _FEED_NAME = 'history'
4789 _TESTS = [{
4790 'url': ':ythistory',
4791 'only_matching': True,
4792 }]
4793
4794
4795class YoutubeTruncatedURLIE(InfoExtractor):
4796 IE_NAME = 'youtube:truncated_url'
4797 IE_DESC = False # Do not list
4798 _VALID_URL = r'''(?x)
4799 (?:https?://)?
4800 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4801 (?:watch\?(?:
4802 feature=[a-z_]+|
4803 annotation_id=annotation_[^&]+|
4804 x-yt-cl=[0-9]+|
4805 hl=[^&]*|
4806 t=[0-9]+
4807 )?
4808 |
4809 attribution_link\?a=[^&]+
4810 )
4811 $
4812 '''
4813
4814 _TESTS = [{
4815 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4816 'only_matching': True,
4817 }, {
4818 'url': 'https://www.youtube.com/watch?',
4819 'only_matching': True,
4820 }, {
4821 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4822 'only_matching': True,
4823 }, {
4824 'url': 'https://www.youtube.com/watch?feature=foo',
4825 'only_matching': True,
4826 }, {
4827 'url': 'https://www.youtube.com/watch?hl=en-GB',
4828 'only_matching': True,
4829 }, {
4830 'url': 'https://www.youtube.com/watch?t=2372',
4831 'only_matching': True,
4832 }]
4833
4834 def _real_extract(self, url):
4835 raise ExtractorError(
4836 'Did you forget to quote the URL? Remember that & is a meta '
4837 'character in most shells, so you want to put the URL in quotes, '
4838 'like youtube-dl '
4839 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4840 ' or simply youtube-dl BaW_jenozKc .',
4841 expected=True)
4842
4843
4844class YoutubeClipIE(InfoExtractor):
4845 IE_NAME = 'youtube:clip'
4846 IE_DESC = False # Do not list
4847 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4848
4849 def _real_extract(self, url):
4850 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4851 return self.url_result(url, 'Generic')
4852
4853
4854class YoutubeTruncatedIDIE(InfoExtractor):
4855 IE_NAME = 'youtube:truncated_id'
4856 IE_DESC = False # Do not list
4857 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4858
4859 _TESTS = [{
4860 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4861 'only_matching': True,
4862 }]
4863
4864 def _real_extract(self, url):
4865 video_id = self._match_id(url)
4866 raise ExtractorError(
4867 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4868 expected=True)