]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[npr] Make SMIL extraction non-fatal (#2099)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
109dd3b2 6import copy
fe93e2c4 7import datetime
adbc4ec4 8import functools
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
720c3099 12import math
c4417ddb 13import os.path
d77ab8e2 14import random
c5e8d7af 15import re
46383212 16import sys
8a784c74 17import time
e0df6211 18import traceback
adbc4ec4 19import threading
c5e8d7af 20
b05654f0 21from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 22from ..compat import (
edf3e38e 23 compat_chr,
29f7c58a 24 compat_HTTPError,
c5e8d7af 25 compat_parse_qs,
545cc85d 26 compat_str,
7fd002c0 27 compat_urllib_parse_unquote_plus,
15707c7e 28 compat_urllib_parse_urlencode,
7c80519c 29 compat_urllib_parse_urlparse,
7c61bd36 30 compat_urlparse,
4bb4a188 31)
545cc85d 32from ..jsinterp import JSInterpreter
4bb4a188 33from ..utils import (
720c3099 34 bug_reports_message,
c5e8d7af 35 clean_html,
d92f5d5a 36 datetime_from_str,
11f9be09 37 dict_get,
358de58c 38 error_to_compat_str,
c5e8d7af 39 ExtractorError,
2d30521a 40 float_or_none,
11f9be09 41 format_field,
dd27fd17 42 int_or_none,
641ad5d8 43 is_html,
34921b43 44 join_nonempty,
94278f72 45 mimetype2ext,
9c0d7f49 46 network_exceptions,
a6213a49 47 NO_DEFAULT,
11f9be09 48 orderedSet,
6310acf5 49 parse_codecs,
49bd8c66 50 parse_count,
7c80519c 51 parse_duration,
7ea65411 52 parse_iso8601,
4dfbf869 53 parse_qs,
dca3ff4a 54 qualities,
c0ac49bc 55 remove_end,
3995d37d 56 remove_start,
cf7e015f 57 smuggle_url,
dbdaaa23 58 str_or_none,
c93d53f5 59 str_to_int,
f3aa3c3f 60 strftime_or_none,
7c365c21 61 traverse_obj,
556dbe7f 62 try_get,
c5e8d7af
PH
63 unescapeHTML,
64 unified_strdate,
cf7e015f 65 unsmuggle_url,
8bdd16b4 66 update_url_query,
21c340b8 67 url_or_none,
fe93e2c4 68 urljoin,
7c365c21 69 variadic,
c5e8d7af
PH
70)
71
5f6a1245 72
720c3099 73def get_first(obj, keys, **kwargs):
74 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
75
76
000c15a4 77# any clients starting with _ cannot be explicity requested by the user
78INNERTUBE_CLIENTS = {
79 'web': {
80 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
81 'INNERTUBE_CONTEXT': {
82 'client': {
83 'clientName': 'WEB',
84 'clientVersion': '2.20210622.10.00',
85 }
86 },
87 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
88 },
89 'web_embedded': {
90 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
91 'INNERTUBE_CONTEXT': {
92 'client': {
93 'clientName': 'WEB_EMBEDDED_PLAYER',
94 'clientVersion': '1.20210620.0.1',
95 },
96 },
97 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
98 },
99 'web_music': {
100 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
101 'INNERTUBE_HOST': 'music.youtube.com',
102 'INNERTUBE_CONTEXT': {
103 'client': {
104 'clientName': 'WEB_REMIX',
105 'clientVersion': '1.20210621.00.00',
106 }
107 },
108 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
109 },
e7e94f2a
D
110 'web_creator': {
111 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
112 'INNERTUBE_CONTEXT': {
113 'client': {
114 'clientName': 'WEB_CREATOR',
115 'clientVersion': '1.20210621.00.00',
116 }
117 },
118 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
119 },
000c15a4 120 'android': {
121 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
122 'INNERTUBE_CONTEXT': {
123 'client': {
124 'clientName': 'ANDROID',
125 'clientVersion': '16.20',
126 }
127 },
128 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 129 'REQUIRE_JS_PLAYER': False
000c15a4 130 },
131 'android_embedded': {
132 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
133 'INNERTUBE_CONTEXT': {
134 'client': {
135 'clientName': 'ANDROID_EMBEDDED_PLAYER',
136 'clientVersion': '16.20',
137 },
138 },
b6de707d 139 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
140 'REQUIRE_JS_PLAYER': False
000c15a4 141 },
142 'android_music': {
143 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
144 'INNERTUBE_HOST': 'music.youtube.com',
145 'INNERTUBE_CONTEXT': {
146 'client': {
147 'clientName': 'ANDROID_MUSIC',
148 'clientVersion': '4.32',
149 }
150 },
151 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 152 'REQUIRE_JS_PLAYER': False
000c15a4 153 },
e7e94f2a
D
154 'android_creator': {
155 'INNERTUBE_CONTEXT': {
156 'client': {
157 'clientName': 'ANDROID_CREATOR',
158 'clientVersion': '21.24.100',
159 },
160 },
b6de707d 161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
162 'REQUIRE_JS_PLAYER': False
e7e94f2a 163 },
3619f78d 164 # ios has HLS live streams
165 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
000c15a4 166 'ios': {
167 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
168 'INNERTUBE_CONTEXT': {
169 'client': {
170 'clientName': 'IOS',
171 'clientVersion': '16.20',
172 }
173 },
b6de707d 174 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
175 'REQUIRE_JS_PLAYER': False
000c15a4 176 },
177 'ios_embedded': {
178 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
179 'INNERTUBE_CONTEXT': {
180 'client': {
181 'clientName': 'IOS_MESSAGES_EXTENSION',
182 'clientVersion': '16.20',
183 },
184 },
b6de707d 185 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
186 'REQUIRE_JS_PLAYER': False
000c15a4 187 },
188 'ios_music': {
189 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
190 'INNERTUBE_HOST': 'music.youtube.com',
191 'INNERTUBE_CONTEXT': {
192 'client': {
193 'clientName': 'IOS_MUSIC',
194 'clientVersion': '4.32',
195 },
196 },
b6de707d 197 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
198 'REQUIRE_JS_PLAYER': False
000c15a4 199 },
e7e94f2a
D
200 'ios_creator': {
201 'INNERTUBE_CONTEXT': {
202 'client': {
203 'clientName': 'IOS_CREATOR',
204 'clientVersion': '21.24.100',
205 },
206 },
b6de707d 207 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
208 'REQUIRE_JS_PLAYER': False
e7e94f2a 209 },
3619f78d 210 # mweb has 'ultralow' formats
211 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 212 'mweb': {
213 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
214 'INNERTUBE_CONTEXT': {
215 'client': {
216 'clientName': 'MWEB',
217 'clientVersion': '2.20210721.07.00',
218 }
219 },
220 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
221 },
222}
223
224
225def build_innertube_clients():
65c2fde2 226 third_party = {
227 'embedUrl': 'https://google.com', # Can be any valid URL
228 }
000c15a4 229 base_clients = ('android', 'web', 'ios', 'mweb')
230 priority = qualities(base_clients[::-1])
231
232 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 233 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 234 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 235 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 236 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
237 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
238
239 if client in base_clients:
240 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
241 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
65c2fde2 242 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 243 agegate_ytcfg['priority'] -= 1
244 elif client.endswith('_embedded'):
65c2fde2 245 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 246 ytcfg['priority'] -= 2
247 else:
248 ytcfg['priority'] -= 3
249
250
251build_innertube_clients()
252
253
de7f3446 254class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 255 """Provide base functions for Youtube extractors"""
e00eb564 256
3462ffa8 257 _RESERVED_NAMES = (
3cd786db 258 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
3619f78d 259 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
260 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 261 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 262
3619f78d 263 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
264
b2e8bc1b 265 _NETRC_MACHINE = 'youtube'
3619f78d 266
b2e8bc1b
JMF
267 # If True it will raise an error if no login info is provided
268 _LOGIN_REQUIRED = False
269
d9190e44
RH
270 _INVIDIOUS_SITES = (
271 # invidious-redirect websites
272 r'(?:www\.)?redirect\.invidious\.io',
273 r'(?:(?:www|dev)\.)?invidio\.us',
274 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
275 r'(?:www\.)?invidious\.pussthecat\.org',
276 r'(?:www\.)?invidious\.zee\.li',
277 r'(?:www\.)?invidious\.ethibox\.fr',
278 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
279 # youtube-dl invidious instances list
280 r'(?:(?:www|no)\.)?invidiou\.sh',
281 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
282 r'(?:www\.)?invidious\.kabi\.tk',
283 r'(?:www\.)?invidious\.mastodon\.host',
284 r'(?:www\.)?invidious\.zapashcanon\.fr',
285 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
286 r'(?:www\.)?invidious\.tinfoil-hat\.net',
287 r'(?:www\.)?invidious\.himiko\.cloud',
288 r'(?:www\.)?invidious\.reallyancient\.tech',
289 r'(?:www\.)?invidious\.tube',
290 r'(?:www\.)?invidiou\.site',
291 r'(?:www\.)?invidious\.site',
292 r'(?:www\.)?invidious\.xyz',
293 r'(?:www\.)?invidious\.nixnet\.xyz',
294 r'(?:www\.)?invidious\.048596\.xyz',
295 r'(?:www\.)?invidious\.drycat\.fr',
296 r'(?:www\.)?inv\.skyn3t\.in',
297 r'(?:www\.)?tube\.poal\.co',
298 r'(?:www\.)?tube\.connect\.cafe',
299 r'(?:www\.)?vid\.wxzm\.sx',
300 r'(?:www\.)?vid\.mint\.lgbt',
301 r'(?:www\.)?vid\.puffyan\.us',
302 r'(?:www\.)?yewtu\.be',
303 r'(?:www\.)?yt\.elukerio\.org',
304 r'(?:www\.)?yt\.lelux\.fi',
305 r'(?:www\.)?invidious\.ggc-project\.de',
306 r'(?:www\.)?yt\.maisputain\.ovh',
307 r'(?:www\.)?ytprivate\.com',
308 r'(?:www\.)?invidious\.13ad\.de',
309 r'(?:www\.)?invidious\.toot\.koeln',
310 r'(?:www\.)?invidious\.fdn\.fr',
311 r'(?:www\.)?watch\.nettohikari\.com',
312 r'(?:www\.)?invidious\.namazso\.eu',
313 r'(?:www\.)?invidious\.silkky\.cloud',
314 r'(?:www\.)?invidious\.exonip\.de',
315 r'(?:www\.)?invidious\.riverside\.rocks',
316 r'(?:www\.)?invidious\.blamefran\.net',
317 r'(?:www\.)?invidious\.moomoo\.de',
318 r'(?:www\.)?ytb\.trom\.tf',
319 r'(?:www\.)?yt\.cyberhost\.uk',
320 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
321 r'(?:www\.)?qklhadlycap4cnod\.onion',
322 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
323 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
324 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
325 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
326 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
327 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
328 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
329 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
330 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
331 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
332 )
333
b2e8bc1b 334 def _login(self):
83317f69 335 """
336 Attempt to log in to YouTube.
83317f69 337 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
338 """
9d5d4d64 339
982ee69a
MB
340 if (self._LOGIN_REQUIRED
341 and self.get_param('cookiefile') is None
342 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 343 self.raise_login_required(
344 'Login details are needed to download this content', method='cookies')
68217024 345 username, password = self._get_login_info()
9d5d4d64 346 if username:
24b0a72b 347 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
b2e8bc1b 348
cce889b9 349 def _initialize_consent(self):
350 cookies = self._get_cookies('https://www.youtube.com/')
351 if cookies.get('__Secure-3PSID'):
352 return
353 consent_id = None
354 consent = cookies.get('CONSENT')
355 if consent:
356 if 'YES' in consent.value:
357 return
358 consent_id = self._search_regex(
359 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
360 if not consent_id:
361 consent_id = random.randint(100, 999)
362 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 363
f3aa3c3f 364 def _initialize_pref(self):
365 cookies = self._get_cookies('https://www.youtube.com/')
366 pref_cookie = cookies.get('PREF')
367 pref = {}
368 if pref_cookie:
369 try:
370 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
371 except ValueError:
372 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
373 pref.update({'hl': 'en'})
374 self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
375
b2e8bc1b 376 def _real_initialize(self):
f3aa3c3f 377 self._initialize_pref()
cce889b9 378 self._initialize_consent()
24b0a72b 379 self._login()
c5e8d7af 380
a0566bbf 381 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 382 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
383 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 384
000c15a4 385 def _get_default_ytcfg(self, client='web'):
386 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 387
000c15a4 388 def _get_innertube_host(self, client='web'):
389 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 390
000c15a4 391 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 392 # try_get but with fallback to default ytcfg client values when present
393 _func = lambda y: try_get(y, getter, expected_type)
394 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
395
000c15a4 396 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 397 return self._ytcfg_get_safe(
398 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
399 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 400
000c15a4 401 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 402 return self._ytcfg_get_safe(
403 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
404 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 405
000c15a4 406 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 407 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
408
000c15a4 409 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 410 context = get_first(
411 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
412 # Enforce language for extraction
413 traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'
109dd3b2 414 return context
415
cf87314d 416 _SAPISID = None
417
109dd3b2 418 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 419 time_now = round(time.time())
cf87314d 420 if self._SAPISID is None:
421 yt_cookies = self._get_cookies('https://www.youtube.com')
422 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
423 # See: https://github.com/yt-dlp/yt-dlp/issues/393
424 sapisid_cookie = dict_get(
425 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
426 if sapisid_cookie and sapisid_cookie.value:
427 self._SAPISID = sapisid_cookie.value
428 self.write_debug('Extracted SAPISID cookie')
429 # SAPISID cookie is required if not already present
430 if not yt_cookies.get('SAPISID'):
431 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
432 self._set_cookie(
433 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
434 else:
435 self._SAPISID = False
436 if not self._SAPISID:
437 return None
1974e99f 438 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
439 sapisidhash = hashlib.sha1(
cf87314d 440 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
1974e99f 441 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
442
443 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 444 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 445 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 446
109dd3b2 447 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 448 data.update(query)
11f9be09 449 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 450 real_headers.update({'content-type': 'application/json'})
451 if headers:
452 real_headers.update(headers)
545cc85d 453 return self._download_json(
109dd3b2 454 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 455 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 456 data=json.dumps(data).encode('utf8'), headers=real_headers,
457 query={'key': api_key or self._extract_api_key()})
458
ac56cf38 459 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
460 data = self._search_regex(
461 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
462 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
463 if data:
464 return self._parse_json(data, item_id, fatal=fatal)
0c148415 465
99e9e001 466 @staticmethod
467 def _extract_session_index(*data):
468 """
469 Index of current account in account list.
470 See: https://github.com/yt-dlp/yt-dlp/pull/519
471 """
472 for ytcfg in data:
473 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
474 if session_index is not None:
475 return session_index
476
477 # Deprecated?
478 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca
M
479 if ytcfg:
480 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
481 if token:
482 return token
99e9e001 483 if webpage:
484 return self._search_regex(
485 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
486 'identity token', default=None, fatal=False)
a1c5d2ca
M
487
488 @staticmethod
fe93e2c4 489 def _extract_account_syncid(*args):
8ea3f7b9 490 """
491 Extract syncId required to download private playlists of secondary channels
fe93e2c4 492 @params response and/or ytcfg
8ea3f7b9 493 """
fe93e2c4 494 for data in args:
495 # ytcfg includes channel_syncid if on secondary channel
496 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
497 if delegated_sid:
498 return delegated_sid
499 sync_ids = (try_get(
500 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
e6f21b3d 501 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
fe93e2c4 502 if len(sync_ids) >= 2 and sync_ids[1]:
503 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
504 # and just "user_syncid||" for primary channel. We only want the channel_syncid
505 return sync_ids[0]
a1c5d2ca 506
ac56cf38 507 @staticmethod
508 def _extract_visitor_data(*args):
509 """
510 Extracts visitorData from an API response or ytcfg
511 Appears to be used to track session state
512 """
9222c381 513 return get_first(
514 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
515 expected_type=str)
ac56cf38 516
99e9e001 517 @property
518 def is_authenticated(self):
519 return bool(self._generate_sapisidhash_header())
520
11f9be09 521 def extract_ytcfg(self, video_id, webpage):
8c54a305 522 if not webpage:
523 return {}
29f7c58a 524 return self._parse_json(
525 self._search_regex(
526 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 527 default='{}'), video_id, fatal=False) or {}
528
11f9be09 529 def generate_api_headers(
99e9e001 530 self, *, ytcfg=None, account_syncid=None, session_index=None,
531 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
532
11f9be09 533 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 534 headers = {
109dd3b2 535 'X-YouTube-Client-Name': compat_str(
11f9be09 536 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
537 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 538 'Origin': origin,
539 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
540 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
ac56cf38 541 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
99e9e001 542 }
543 if session_index is None:
314ee305 544 session_index = self._extract_session_index(ytcfg)
545 if account_syncid or session_index is not None:
546 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 547
109dd3b2 548 auth = self._generate_sapisidhash_header(origin)
f4f751af 549 if auth is not None:
550 headers['Authorization'] = auth
109dd3b2 551 headers['X-Origin'] = origin
99e9e001 552 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 553
2d6659b9 554 @staticmethod
555 def _build_api_continuation_query(continuation, ctp=None):
556 query = {
557 'continuation': continuation
558 }
559 # TODO: Inconsistency with clickTrackingParams.
560 # Currently we have a fixed ctp contained within context (from ytcfg)
561 # and a ctp in root query for continuation.
562 if ctp:
563 query['clickTracking'] = {'clickTrackingParams': ctp}
564 return query
565
2d6659b9 566 @classmethod
567 def _extract_next_continuation_data(cls, renderer):
568 next_continuation = try_get(
569 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
570 lambda x: x['continuation']['reloadContinuationData']), dict)
571 if not next_continuation:
572 return
573 continuation = next_continuation.get('continuation')
574 if not continuation:
575 return
576 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 577 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 578
579 @classmethod
580 def _extract_continuation_ep_data(cls, continuation_ep: dict):
581 if isinstance(continuation_ep, dict):
582 continuation = try_get(
583 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
584 if not continuation:
585 return
586 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 587 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 588
589 @classmethod
590 def _extract_continuation(cls, renderer):
591 next_continuation = cls._extract_next_continuation_data(renderer)
592 if next_continuation:
593 return next_continuation
fe93e2c4 594
2d6659b9 595 contents = []
596 for key in ('contents', 'items'):
597 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 598
2d6659b9 599 for content in contents:
600 if not isinstance(content, dict):
601 continue
602 continuation_ep = try_get(
603 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
604 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
605 dict)
606 continuation = cls._extract_continuation_ep_data(continuation_ep)
607 if continuation:
608 return continuation
609
fe93e2c4 610 @classmethod
611 def _extract_alerts(cls, data):
109dd3b2 612 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
613 if not isinstance(alert_dict, dict):
614 continue
615 for alert in alert_dict.values():
616 alert_type = alert.get('type')
617 if not alert_type:
618 continue
052e1350 619 message = cls._get_text(alert, 'text')
109dd3b2 620 if message:
621 yield alert_type, message
622
c0ac49bc 623 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 624 errors = []
625 warnings = []
626 for alert_type, alert_message in alerts:
641ad5d8 627 if alert_type.lower() == 'error' and fatal:
109dd3b2 628 errors.append([alert_type, alert_message])
629 else:
630 warnings.append([alert_type, alert_message])
631
632 for alert_type, alert_message in (warnings + errors[:-1]):
c0ac49bc 633 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
109dd3b2 634 if errors:
635 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
636
637 def _extract_and_report_alerts(self, data, *args, **kwargs):
638 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
639
47193e02 640 def _extract_badges(self, renderer: dict):
641 badges = set()
642 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
643 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
644 if label:
645 badges.add(label.lower())
646 return badges
647
648 @staticmethod
052e1350 649 def _get_text(data, *path_list, max_runs=None):
650 for path in path_list or [None]:
651 if path is None:
652 obj = [data]
653 else:
654 obj = traverse_obj(data, path, default=[])
655 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
656 obj = [obj]
657 for item in obj:
658 text = try_get(item, lambda x: x['simpleText'], compat_str)
659 if text:
660 return text
661 runs = try_get(item, lambda x: x['runs'], list) or []
662 if not runs and isinstance(item, list):
663 runs = item
664
665 runs = runs[:min(len(runs), max_runs or len(runs))]
666 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
667 if text:
668 return text
47193e02 669
f3aa3c3f 670 @staticmethod
671 def extract_relative_time(relative_time_text):
672 """
673 Extracts a relative time from string and converts to dt object
674 e.g. 'streamed 6 days ago', '5 seconds ago (edited)'
675 """
676 mobj = re.search(r'(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
677 if mobj:
678 try:
679 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')), precision='auto')
680 except ValueError:
681 return None
682
683 def _extract_time_text(self, renderer, *path_list):
684 text = self._get_text(renderer, *path_list) or ''
685 dt = self.extract_relative_time(text)
686 timestamp = None
687 if isinstance(dt, datetime.datetime):
688 timestamp = calendar.timegm(dt.timetuple())
689 if text and timestamp is None:
690 self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)
691 return timestamp, text
692
109dd3b2 693 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
694 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 695 default_client='web'):
109dd3b2 696 response = None
697 last_error = None
698 count = -1
699 retries = self.get_param('extractor_retries', 3)
700 if check_get_keys is None:
701 check_get_keys = []
702 while count < retries:
703 count += 1
704 if last_error:
c0ac49bc 705 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
109dd3b2 706 try:
707 response = self._call_api(
708 ep=ep, fatal=True, headers=headers,
709 video_id=item_id, query=query,
710 context=self._extract_context(ytcfg, default_client),
711 api_key=self._extract_api_key(ytcfg, default_client),
712 api_hostname=api_hostname, default_client=default_client,
713 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
714 except ExtractorError as e:
9c0d7f49 715 if isinstance(e.cause, network_exceptions):
641ad5d8 716 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
717 e.cause.seek(0)
718 yt_error = try_get(
719 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
720 lambda x: x['error']['message'], compat_str)
721 if yt_error:
722 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 723 # Downloading page may result in intermittent 5xx HTTP error
724 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 725 # We also want to catch all other network exceptions since errors in later pages can be troublesome
726 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
727 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
526d74ec 728 last_error = error_to_compat_str(e.cause or e.msg)
9c0d7f49 729 if count < retries:
730 continue
109dd3b2 731 if fatal:
732 raise
733 else:
734 self.report_warning(error_to_compat_str(e))
735 return
736
737 else:
109dd3b2 738 try:
ac56cf38 739 self._extract_and_report_alerts(response, only_once=True)
109dd3b2 740 except ExtractorError as e:
c0ac49bc 741 # YouTube servers may return errors we want to retry on in a 200 OK response
742 # See: https://github.com/yt-dlp/yt-dlp/issues/839
743 if 'unknown error' in e.msg.lower():
744 last_error = e.msg
745 continue
109dd3b2 746 if fatal:
747 raise
748 self.report_warning(error_to_compat_str(e))
749 return
750 if not check_get_keys or dict_get(response, check_get_keys):
751 break
752 # Youtube sometimes sends incomplete data
753 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
754 last_error = 'Incomplete data received'
755 if count >= retries:
756 if fatal:
757 raise ExtractorError(last_error)
758 else:
759 self.report_warning(last_error)
760 return
761 return response
762
9297939e 763 @staticmethod
764 def is_music_url(url):
765 return re.match(r'https?://music\.youtube\.com/', url) is not None
766
30a074c2 767 def _extract_video(self, renderer):
768 video_id = renderer.get('videoId')
052e1350 769 title = self._get_text(renderer, 'title')
770 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 771 duration = parse_duration(self._get_text(
772 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 773 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 774 view_count = str_to_int(self._search_regex(
775 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
776 'view count', default=None))
fe93e2c4 777
052e1350 778 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
f3aa3c3f 779 channel_id = traverse_obj(
780 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)
781 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
782 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
783 overlay_style = traverse_obj(
784 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
785 badges = self._extract_badges(renderer)
30a074c2 786 return {
39ed931e 787 '_type': 'url',
30a074c2 788 'ie_key': YoutubeIE.ie_key(),
789 'id': video_id,
5e3f2f8f 790 'url': f'https://www.youtube.com/watch?v={video_id}',
30a074c2 791 'title': title,
792 'description': description,
793 'duration': duration,
794 'view_count': view_count,
795 'uploader': uploader,
f3aa3c3f 796 'channel_id': channel_id,
797 'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
798 'live_status': ('is_upcoming' if scheduled_timestamp is not None
799 else 'was_live' if 'streamed' in time_text.lower()
800 else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
801 else None),
802 'release_timestamp': scheduled_timestamp,
803 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
30a074c2 804 }
805
0c148415 806
360e1ca5 807class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 808 IE_DESC = 'YouTube'
cb7dfeea 809 _VALID_URL = r"""(?x)^
c5e8d7af 810 (
edb53e2d 811 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 812 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
813 (?:www\.)?deturl\.com/www\.youtube\.com|
814 (?:www\.)?pwnyoutube\.com|
815 (?:www\.)?hooktube\.com|
816 (?:www\.)?yourepeat\.com|
817 tube\.majestyc\.net|
818 %(invidious)s|
819 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
820 (?:.*?\#/)? # handle anchor (#/) redirect urls
821 (?: # the various things that can precede the ID:
8fc54b12 822 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
c5e8d7af 823 |(?: # or the v= param in all its forms
f7000f3a 824 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 825 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 826 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
827 v=
828 )
f4b05232 829 ))
cbaed4bb
S
830 |(?:
831 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
832 vid\.plus| # or vid.plus/xxxx
833 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 834 %(invidious)s
cbaed4bb 835 )/
edb53e2d 836 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 837 )
c5e8d7af 838 )? # all until now is optional -> you can pass the naked ID
201c1459 839 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 840 (?(1).+)? # if we found the ID, everything can follow
9297939e 841 (?:\#|$)""" % {
d9190e44 842 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 843 }
e40c758c 844 _PLAYER_INFO_RE = (
cc2db878 845 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
846 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 847 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 848 )
2c62dc26 849 _formats = {
c2d3cb4c 850 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
851 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
852 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
853 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
854 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
855 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
856 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
857 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 858 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 859 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
860 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
861 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
862 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
863 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
864 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 865 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 866 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
867 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 868
869
870 # 3D videos
c2d3cb4c 871 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
872 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
873 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
874 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 875 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
876 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
877 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 878
96fb5605 879 # Apple HTTP Live Streaming
11f12195 880 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 881 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
882 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
883 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
884 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
885 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 886 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
887 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
888
889 # DASH mp4 video
d23028a8
S
890 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
891 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
892 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
893 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
894 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 895 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
896 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
897 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
898 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
899 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
900 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
901 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 902
f6f1fc92 903 # Dash mp4 audio
d23028a8
S
904 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
905 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
906 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
907 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
908 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
909 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
910 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
911
912 # Dash webm
d23028a8
S
913 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
914 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
915 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
916 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
917 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
918 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
919 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
920 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
921 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
922 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
923 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
924 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
925 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
926 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
927 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 928 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
929 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
930 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
931 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
932 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
933 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
934 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
935
936 # Dash webm audio
d23028a8
S
937 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
938 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 939
0857baad 940 # Dash webm audio with opus inside
d23028a8
S
941 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
942 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
943 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 944
ce6b9a2d
PH
945 # RTMP (unnamed)
946 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
947
948 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
949 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
950 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
951 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
952 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
953 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
954 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
955 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
956 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 957 }
29f7c58a 958 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 959
fd5c4aab
S
960 _GEO_BYPASS = False
961
78caa52a 962 IE_NAME = 'youtube'
2eb88d95
PH
963 _TESTS = [
964 {
2d3d2997 965 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
966 'info_dict': {
967 'id': 'BaW_jenozKc',
968 'ext': 'mp4',
3867038a 969 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
970 'uploader': 'Philipp Hagemeister',
971 'uploader_id': 'phihag',
ec85ded8 972 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 973 'channel': 'Philipp Hagemeister',
dd4c4492
S
974 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
975 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 976 'upload_date': '20121002',
ff9f925b 977 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 978 'categories': ['Science & Technology'],
3867038a 979 'tags': ['youtube-dl'],
556dbe7f 980 'duration': 10,
dbdaaa23 981 'view_count': int,
3e7c1224 982 'like_count': int,
ff9f925b 983 # 'dislike_count': int,
984 'availability': 'public',
985 'playable_in_embed': True,
986 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
987 'live_status': 'not_live',
988 'age_limit': 0,
7c80519c 989 'start_time': 1,
297a564b 990 'end_time': 9,
2eb88d95 991 }
0e853ca4 992 },
fccd3771 993 {
4bc3a23e
PH
994 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
995 'note': 'Embed-only video (#1746)',
996 'info_dict': {
997 'id': 'yZIXLfi8CZQ',
998 'ext': 'mp4',
999 'upload_date': '20120608',
1000 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1001 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1002 'uploader': 'SET India',
94bfcd23 1003 'uploader_id': 'setindia',
ec85ded8 1004 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1005 'age_limit': 18,
545cc85d 1006 },
1007 'skip': 'Private video',
fccd3771 1008 },
11b56058 1009 {
8bdd16b4 1010 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1011 'note': 'Use the first video ID in the URL',
1012 'info_dict': {
1013 'id': 'BaW_jenozKc',
1014 'ext': 'mp4',
3867038a 1015 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1016 'uploader': 'Philipp Hagemeister',
1017 'uploader_id': 'phihag',
ec85ded8 1018 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1019 'upload_date': '20121002',
3867038a 1020 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1021 'categories': ['Science & Technology'],
3867038a 1022 'tags': ['youtube-dl'],
556dbe7f 1023 'duration': 10,
dbdaaa23 1024 'view_count': int,
11b56058
PM
1025 'like_count': int,
1026 'dislike_count': int,
34a7de29
S
1027 },
1028 'params': {
1029 'skip_download': True,
1030 },
11b56058 1031 },
dd27fd17 1032 {
2d3d2997 1033 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1034 'note': '256k DASH audio (format 141) via DASH manifest',
1035 'info_dict': {
1036 'id': 'a9LDPn-MO4I',
1037 'ext': 'm4a',
1038 'upload_date': '20121002',
1039 'uploader_id': '8KVIDEO',
ec85ded8 1040 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1041 'description': '',
1042 'uploader': '8KVIDEO',
1043 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1044 },
4bc3a23e
PH
1045 'params': {
1046 'youtube_include_dash_manifest': True,
1047 'format': '141',
4919603f 1048 },
de3c7fe0 1049 'skip': 'format 141 not served anymore',
dd27fd17 1050 },
8bdd16b4 1051 # DASH manifest with encrypted signature
1052 {
1053 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1054 'info_dict': {
1055 'id': 'IB3lcPjvWLA',
1056 'ext': 'm4a',
1057 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1058 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1059 'duration': 244,
1060 'uploader': 'AfrojackVEVO',
1061 'uploader_id': 'AfrojackVEVO',
1062 'upload_date': '20131011',
cc2db878 1063 'abr': 129.495,
8bdd16b4 1064 },
1065 'params': {
1066 'youtube_include_dash_manifest': True,
1067 'format': '141/bestaudio[ext=m4a]',
1068 },
1069 },
65c2fde2 1070 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1071 {
65c2fde2 1072 'note': 'Embed allowed age-gate video',
2d3d2997 1073 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1074 'info_dict': {
1075 'id': 'HtVdAasjOgU',
1076 'ext': 'mp4',
1077 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1078 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1079 'duration': 142,
c522adb1
JMF
1080 'uploader': 'The Witcher',
1081 'uploader_id': 'WitcherGame',
ec85ded8 1082 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1083 'upload_date': '20140605',
34952f09 1084 'age_limit': 18,
c522adb1
JMF
1085 },
1086 },
65c2fde2 1087 {
1088 'note': 'Age-gate video with embed allowed in public site',
1089 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1090 'info_dict': {
1091 'id': 'HsUATh_Nc2U',
1092 'ext': 'mp4',
1093 'title': 'Godzilla 2 (Official Video)',
1094 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1095 'upload_date': '20200408',
1096 'uploader_id': 'FlyingKitty900',
1097 'uploader': 'FlyingKitty',
1098 'age_limit': 18,
1099 },
1100 },
1101 {
1102 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1103 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1104 'info_dict': {
1105 'id': 'Tq92D6wQ1mg',
1106 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1107 'ext': 'mp4',
1108 'upload_date': '20191227',
65c2fde2 1109 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1110 'uploader': 'Projekt Melody',
1111 'description': 'md5:17eccca93a786d51bc67646756894066',
1112 'age_limit': 18,
1113 },
1114 },
1115 {
1116 'note': 'Non-Agegated non-embeddable video',
1117 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1118 'info_dict': {
1119 'id': 'MeJVWBSsPAY',
1120 'ext': 'mp4',
1121 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1122 'uploader': 'Herr Lurik',
1123 'uploader_id': 'st3in234',
1124 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1125 'upload_date': '20130730',
1126 },
1127 },
1128 {
1129 'note': 'Non-bypassable age-gated video',
1130 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1131 'only_matching': True,
1132 },
8bdd16b4 1133 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1134 # YouTube Red ad is not captured for creator
1135 {
1136 'url': '__2ABJjxzNo',
1137 'info_dict': {
1138 'id': '__2ABJjxzNo',
1139 'ext': 'mp4',
1140 'duration': 266,
1141 'upload_date': '20100430',
1142 'uploader_id': 'deadmau5',
1143 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1144 'creator': 'deadmau5',
1145 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1146 'uploader': 'deadmau5',
1147 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1148 'alt_title': 'Some Chords',
8bdd16b4 1149 },
1150 'expected_warnings': [
1151 'DASH manifest missing',
1152 ]
1153 },
067aa17e 1154 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1155 {
1156 'url': 'lqQg6PlCWgI',
1157 'info_dict': {
1158 'id': 'lqQg6PlCWgI',
1159 'ext': 'mp4',
556dbe7f 1160 'duration': 6085,
90227264 1161 'upload_date': '20150827',
cbe2bd91 1162 'uploader_id': 'olympic',
ec85ded8 1163 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1164 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1165 'uploader': 'Olympics',
cbe2bd91
PH
1166 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1167 },
1168 'params': {
1169 'skip_download': 'requires avconv',
e52a40ab 1170 }
cbe2bd91 1171 },
6271f1ca
PH
1172 # Non-square pixels
1173 {
1174 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1175 'info_dict': {
1176 'id': '_b-2C3KPAM0',
1177 'ext': 'mp4',
1178 'stretched_ratio': 16 / 9.,
556dbe7f 1179 'duration': 85,
6271f1ca
PH
1180 'upload_date': '20110310',
1181 'uploader_id': 'AllenMeow',
ec85ded8 1182 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1183 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1184 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1185 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1186 },
06b491eb
S
1187 },
1188 # url_encoded_fmt_stream_map is empty string
1189 {
1190 'url': 'qEJwOuvDf7I',
1191 'info_dict': {
1192 'id': 'qEJwOuvDf7I',
f57b7835 1193 'ext': 'webm',
06b491eb
S
1194 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1195 'description': '',
1196 'upload_date': '20150404',
1197 'uploader_id': 'spbelect',
1198 'uploader': 'Наблюдатели Петербурга',
1199 },
1200 'params': {
1201 'skip_download': 'requires avconv',
e323cf3f
S
1202 },
1203 'skip': 'This live event has ended.',
06b491eb 1204 },
067aa17e 1205 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1206 {
1207 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1208 'info_dict': {
1209 'id': 'FIl7x6_3R5Y',
eb6793ba 1210 'ext': 'webm',
da77d856
S
1211 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1212 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1213 'duration': 220,
da77d856
S
1214 'upload_date': '20150625',
1215 'uploader_id': 'dorappi2000',
ec85ded8 1216 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1217 'uploader': 'dorappi2000',
eb6793ba 1218 'formats': 'mincount:31',
da77d856 1219 },
eb6793ba 1220 'skip': 'not actual anymore',
2ee8f5d8 1221 },
8a1a26ce
YCH
1222 # DASH manifest with segment_list
1223 {
1224 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1225 'md5': '8ce563a1d667b599d21064e982ab9e31',
1226 'info_dict': {
1227 'id': 'CsmdDsKjzN8',
1228 'ext': 'mp4',
17ee98e1 1229 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1230 'uploader': 'Airtek',
1231 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1232 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1233 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1234 },
1235 'params': {
1236 'youtube_include_dash_manifest': True,
1237 'format': '135', # bestvideo
be49068d
S
1238 },
1239 'skip': 'This live event has ended.',
2ee8f5d8 1240 },
cf7e015f
S
1241 {
1242 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1243 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1244 'info_dict': {
545cc85d 1245 'id': 'jvGDaLqkpTg',
1246 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1247 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1248 },
1249 'playlist': [{
1250 'info_dict': {
545cc85d 1251 'id': 'jvGDaLqkpTg',
cf7e015f 1252 'ext': 'mp4',
545cc85d 1253 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1254 'description': 'md5:e03b909557865076822aa169218d6a5d',
1255 'duration': 10643,
1256 'upload_date': '20161111',
1257 'uploader': 'Team PGP',
1258 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1259 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1260 },
1261 }, {
1262 'info_dict': {
545cc85d 1263 'id': '3AKt1R1aDnw',
cf7e015f 1264 'ext': 'mp4',
545cc85d 1265 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1266 'description': 'md5:e03b909557865076822aa169218d6a5d',
1267 'duration': 10991,
1268 'upload_date': '20161111',
1269 'uploader': 'Team PGP',
1270 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1271 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1272 },
1273 }, {
1274 'info_dict': {
545cc85d 1275 'id': 'RtAMM00gpVc',
cf7e015f 1276 'ext': 'mp4',
545cc85d 1277 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1278 'description': 'md5:e03b909557865076822aa169218d6a5d',
1279 'duration': 10995,
1280 'upload_date': '20161111',
1281 'uploader': 'Team PGP',
1282 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1283 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1284 },
1285 }, {
1286 'info_dict': {
545cc85d 1287 'id': '6N2fdlP3C5U',
cf7e015f 1288 'ext': 'mp4',
545cc85d 1289 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1290 'description': 'md5:e03b909557865076822aa169218d6a5d',
1291 'duration': 10990,
1292 'upload_date': '20161111',
1293 'uploader': 'Team PGP',
1294 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1295 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1296 },
1297 }],
1298 'params': {
1299 'skip_download': True,
1300 },
65c2fde2 1301 'skip': 'Not multifeed anymore',
cbaed4bb 1302 },
f9f49d87 1303 {
067aa17e 1304 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1305 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1306 'info_dict': {
1307 'id': 'gVfLd0zydlo',
1308 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1309 },
1310 'playlist_count': 2,
be49068d 1311 'skip': 'Not multifeed anymore',
f9f49d87 1312 },
cbaed4bb 1313 {
2d3d2997 1314 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1315 'only_matching': True,
0e49d9a6 1316 },
6d4fc66b 1317 {
2d3d2997 1318 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1319 'only_matching': True,
1320 },
0e49d9a6 1321 {
067aa17e 1322 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1323 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1324 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1325 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1326 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1327 'info_dict': {
1328 'id': 'lsguqyKfVQg',
1329 'ext': 'mp4',
1330 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1331 'alt_title': 'Dark Walk',
0e49d9a6 1332 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1333 'duration': 133,
0e49d9a6
LL
1334 'upload_date': '20151119',
1335 'uploader_id': 'IronSoulElf',
ec85ded8 1336 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1337 'uploader': 'IronSoulElf',
11f9be09 1338 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1339 'track': 'Dark Walk',
1340 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1341 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1342 },
1343 'params': {
1344 'skip_download': True,
1345 },
1346 },
61f92af1 1347 {
067aa17e 1348 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1349 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1350 'only_matching': True,
1351 },
313dfc45
LL
1352 {
1353 # Video with yt:stretch=17:0
1354 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1355 'info_dict': {
1356 'id': 'Q39EVAstoRM',
1357 'ext': 'mp4',
1358 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1359 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1360 'upload_date': '20151107',
1361 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1362 'uploader': 'CH GAMER DROID',
1363 },
1364 'params': {
1365 'skip_download': True,
1366 },
be49068d 1367 'skip': 'This video does not exist.',
313dfc45 1368 },
201c1459 1369 {
1370 # Video with incomplete 'yt:stretch=16:'
1371 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1372 'only_matching': True,
1373 },
7caf9830
S
1374 {
1375 # Video licensed under Creative Commons
1376 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1377 'info_dict': {
1378 'id': 'M4gD1WSo5mA',
1379 'ext': 'mp4',
1380 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1381 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1382 'duration': 721,
7caf9830
S
1383 'upload_date': '20150127',
1384 'uploader_id': 'BerkmanCenter',
ec85ded8 1385 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1386 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1387 'license': 'Creative Commons Attribution license (reuse allowed)',
1388 },
1389 'params': {
1390 'skip_download': True,
1391 },
1392 },
fd050249
S
1393 {
1394 # Channel-like uploader_url
1395 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1396 'info_dict': {
1397 'id': 'eQcmzGIKrzg',
1398 'ext': 'mp4',
1399 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1400 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1401 'duration': 4060,
fd050249 1402 'upload_date': '20151119',
eb6793ba 1403 'uploader': 'Bernie Sanders',
fd050249 1404 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1405 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1406 'license': 'Creative Commons Attribution license (reuse allowed)',
1407 },
1408 'params': {
1409 'skip_download': True,
1410 },
1411 },
040ac686
S
1412 {
1413 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1414 'only_matching': True,
7f29cf54
S
1415 },
1416 {
067aa17e 1417 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1418 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1419 'only_matching': True,
6496ccb4
S
1420 },
1421 {
1422 # Rental video preview
1423 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1424 'info_dict': {
1425 'id': 'uGpuVWrhIzE',
1426 'ext': 'mp4',
1427 'title': 'Piku - Trailer',
1428 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1429 'upload_date': '20150811',
1430 'uploader': 'FlixMatrix',
1431 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1433 'license': 'Standard YouTube License',
1434 },
1435 'params': {
1436 'skip_download': True,
1437 },
eb6793ba 1438 'skip': 'This video is not available.',
022a5d66 1439 },
12afdc2a
S
1440 {
1441 # YouTube Red video with episode data
1442 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1443 'info_dict': {
1444 'id': 'iqKdEhx-dD4',
1445 'ext': 'mp4',
1446 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1447 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1448 'duration': 2085,
12afdc2a
S
1449 'upload_date': '20170118',
1450 'uploader': 'Vsauce',
1451 'uploader_id': 'Vsauce',
1452 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1453 'series': 'Mind Field',
1454 'season_number': 1,
1455 'episode_number': 1,
1456 },
1457 'params': {
1458 'skip_download': True,
1459 },
1460 'expected_warnings': [
1461 'Skipping DASH manifest',
1462 ],
1463 },
c7121fa7
S
1464 {
1465 # The following content has been identified by the YouTube community
1466 # as inappropriate or offensive to some audiences.
1467 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1468 'info_dict': {
1469 'id': '6SJNVb0GnPI',
1470 'ext': 'mp4',
1471 'title': 'Race Differences in Intelligence',
1472 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1473 'duration': 965,
1474 'upload_date': '20140124',
1475 'uploader': 'New Century Foundation',
1476 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1477 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1478 },
1479 'params': {
1480 'skip_download': True,
1481 },
545cc85d 1482 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1483 },
022a5d66
S
1484 {
1485 # itag 212
1486 'url': '1t24XAntNCY',
1487 'only_matching': True,
fd5c4aab
S
1488 },
1489 {
1490 # geo restricted to JP
1491 'url': 'sJL6WA-aGkQ',
1492 'only_matching': True,
1493 },
cd5a74a2
S
1494 {
1495 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1496 'only_matching': True,
1497 },
bc2ca1bb 1498 {
1499 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1500 'only_matching': True,
1501 },
1502 {
1503 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1504 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1505 'only_matching': True,
1506 },
825cd268
RA
1507 {
1508 # DRM protected
1509 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1510 'only_matching': True,
4fe54c12
S
1511 },
1512 {
1513 # Video with unsupported adaptive stream type formats
1514 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1515 'info_dict': {
1516 'id': 'Z4Vy8R84T1U',
1517 'ext': 'mp4',
1518 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1519 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1520 'duration': 433,
1521 'upload_date': '20130923',
1522 'uploader': 'Amelia Putri Harwita',
1523 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1524 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1525 'formats': 'maxcount:10',
1526 },
1527 'params': {
1528 'skip_download': True,
1529 'youtube_include_dash_manifest': False,
1530 },
5429d6a9 1531 'skip': 'not actual anymore',
5caabd3c 1532 },
1533 {
822b9d9c 1534 # Youtube Music Auto-generated description
5caabd3c 1535 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1536 'info_dict': {
1537 'id': 'MgNrAu2pzNs',
1538 'ext': 'mp4',
1539 'title': 'Voyeur Girl',
1540 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1541 'upload_date': '20190312',
5429d6a9
S
1542 'uploader': 'Stephen - Topic',
1543 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1544 'artist': 'Stephen',
1545 'track': 'Voyeur Girl',
1546 'album': 'it\'s too much love to know my dear',
1547 'release_date': '20190313',
1548 'release_year': 2019,
1549 },
1550 'params': {
1551 'skip_download': True,
1552 },
1553 },
66b48727
RA
1554 {
1555 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1556 'only_matching': True,
1557 },
011e75e6
S
1558 {
1559 # invalid -> valid video id redirection
1560 'url': 'DJztXj2GPfl',
1561 'info_dict': {
1562 'id': 'DJztXj2GPfk',
1563 'ext': 'mp4',
1564 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1565 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1566 'upload_date': '20090125',
1567 'uploader': 'Prochorowka',
1568 'uploader_id': 'Prochorowka',
1569 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1570 'artist': 'Panjabi MC',
1571 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1572 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1573 },
1574 'params': {
1575 'skip_download': True,
1576 },
545cc85d 1577 'skip': 'Video unavailable',
ea74e00b
DP
1578 },
1579 {
1580 # empty description results in an empty string
1581 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1582 'info_dict': {
1583 'id': 'x41yOUIvK2k',
1584 'ext': 'mp4',
1585 'title': 'IMG 3456',
1586 'description': '',
1587 'upload_date': '20170613',
1588 'uploader_id': 'ElevageOrVert',
1589 'uploader': 'ElevageOrVert',
1590 },
1591 'params': {
1592 'skip_download': True,
1593 },
1594 },
a0566bbf 1595 {
29f7c58a 1596 # with '};' inside yt initial data (see [1])
1597 # see [2] for an example with '};' inside ytInitialPlayerResponse
1598 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1599 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1600 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1601 'info_dict': {
1602 'id': 'CHqg6qOn4no',
1603 'ext': 'mp4',
1604 'title': 'Part 77 Sort a list of simple types in c#',
1605 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1606 'upload_date': '20130831',
1607 'uploader_id': 'kudvenkat',
1608 'uploader': 'kudvenkat',
1609 },
1610 'params': {
1611 'skip_download': True,
1612 },
1613 },
29f7c58a 1614 {
1615 # another example of '};' in ytInitialData
1616 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1617 'only_matching': True,
1618 },
1619 {
1620 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1621 'only_matching': True,
1622 },
545cc85d 1623 {
cc2db878 1624 # https://github.com/ytdl-org/youtube-dl/pull/28094
1625 'url': 'OtqTfy26tG0',
1626 'info_dict': {
1627 'id': 'OtqTfy26tG0',
1628 'ext': 'mp4',
1629 'title': 'Burn Out',
1630 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1631 'upload_date': '20141120',
1632 'uploader': 'The Cinematic Orchestra - Topic',
1633 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1634 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1635 'artist': 'The Cinematic Orchestra',
1636 'track': 'Burn Out',
1637 'album': 'Every Day',
1638 'release_data': None,
1639 'release_year': None,
1640 },
1641 'params': {
1642 'skip_download': True,
1643 },
545cc85d 1644 },
bc2ca1bb 1645 {
1646 # controversial video, only works with bpctr when authenticated with cookies
1647 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1648 'only_matching': True,
1649 },
a1a7907b 1650 {
1651 # controversial video, requires bpctr/contentCheckOk
1652 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1653 'info_dict': {
1654 'id': 'SZJvDhaSDnc',
1655 'ext': 'mp4',
1656 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1657 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1658 'uploader': 'CBS This Morning',
11f9be09 1659 'uploader_id': 'CBSThisMorning',
a1a7907b 1660 'upload_date': '20140716',
1661 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1662 }
1663 },
f7ad7160 1664 {
1665 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1666 'url': 'cBvYw8_A0vQ',
1667 'info_dict': {
1668 'id': 'cBvYw8_A0vQ',
1669 'ext': 'mp4',
1670 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1671 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1672 'upload_date': '20201120',
1673 'uploader': 'Walk around Japan',
1674 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1675 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1676 },
1677 'params': {
1678 'skip_download': True,
1679 },
0fb983f6 1680 }, {
1681 # Has multiple audio streams
1682 'url': 'WaOKSUlf4TM',
1683 'only_matching': True
9297939e 1684 }, {
1685 # Requires Premium: has format 141 when requested using YTM url
1686 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1687 'only_matching': True
1688 }, {
120916da 1689 # multiple subtitles with same lang_code
1690 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1691 'only_matching': True,
109dd3b2 1692 }, {
1693 # Force use android client fallback
1694 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1695 'info_dict': {
1696 'id': 'YOelRv7fMxY',
11f9be09 1697 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1698 'ext': '3gp',
1699 'upload_date': '20210624',
1700 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1701 'uploader': 'colinfurze',
11f9be09 1702 'uploader_id': 'colinfurze',
109dd3b2 1703 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1704 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1705 },
1706 'params': {
1707 'format': '17', # 3gp format available on android
1708 'extractor_args': {'youtube': {'player_client': ['android']}},
1709 },
120916da 1710 },
109dd3b2 1711 {
1712 # Skip download of additional client configs (remix client config in this case)
1713 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1714 'only_matching': True,
1715 'params': {
1716 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1717 },
8fc54b12 1718 }, {
1719 # shorts
1720 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1721 'only_matching': True,
9222c381 1722 }, {
1723 'note': 'Storyboards',
1724 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1725 'info_dict': {
1726 'id': '5KLPxDtMqe8',
1727 'ext': 'mhtml',
1728 'format_id': 'sb0',
1729 'title': 'Your Brain is Plastic',
1730 'uploader_id': 'scishow',
1731 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1732 'upload_date': '20140324',
1733 'uploader': 'SciShow',
1734 }, 'params': {'format': 'mhtml', 'skip_download': True}
1735 }
2eb88d95
PH
1736 ]
1737
201c1459 1738 @classmethod
1739 def suitable(cls, url):
4dfbf869 1740 from ..utils import parse_qs
1741
201c1459 1742 qs = parse_qs(url)
1743 if qs.get('list', [None])[0]:
1744 return False
1745 return super(YoutubeIE, cls).suitable(url)
1746
e0df6211
PH
1747 def __init__(self, *args, **kwargs):
1748 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1749 self._code_cache = {}
83799698 1750 self._player_cache = {}
e0df6211 1751
adbc4ec4
THD
1752 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
1753 EXPIRATION_DURATION = 18_000
1754 lock = threading.Lock()
1755
1756 is_live = True
1757 expiration_time = time.time() + EXPIRATION_DURATION
1758 formats = [f for f in formats if f.get('is_from_start')]
1759
1760 def refetch_manifest(format_id):
1761 nonlocal formats, expiration_time, is_live
1762 if time.time() <= expiration_time:
1763 return
1764
1765 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
1766 video_details = traverse_obj(
1767 prs, (..., 'videoDetails'), expected_type=dict, default=[])
1768 microformats = traverse_obj(
1769 prs, (..., 'microformat', 'playerMicroformatRenderer'),
1770 expected_type=dict, default=[])
1771 _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
1772 expiration_time = time.time() + EXPIRATION_DURATION
1773
1774 def mpd_feed(format_id):
1775 """
1776 @returns (manifest_url, manifest_stream_number, is_live) or None
1777 """
1778 with lock:
1779 refetch_manifest(format_id)
1780
1781 f = next((f for f in formats if f['format_id'] == format_id), None)
1782 if not f:
1783 self.report_warning(
1784 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
1785 return None
1786 return f['manifest_url'], f['manifest_stream_number'], is_live
1787
1788 for f in formats:
1789 f['protocol'] = 'http_dash_segments_generator'
1790 f['fragments'] = functools.partial(
1791 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
1792
1793 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
1794 FETCH_SPAN, MAX_DURATION = 5, 432000
1795
1796 mpd_url, stream_number, is_live = None, None, True
1797
1798 begin_index = 0
1799 download_start_time = ctx.get('start') or time.time()
1800
1801 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
1802 if lack_early_segments:
1803 self.report_warning(bug_reports_message(
1804 'Starting download from the last 120 hours of the live stream since '
1805 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
1806 lack_early_segments = True
1807
1808 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
1809 fragments, fragment_base_url = None, None
1810
1811 def _extract_sequence_from_mpd(refresh_sequence):
1812 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
1813 # Obtain from MPD's maximum seq value
1814 old_mpd_url = mpd_url
1815 mpd_url, stream_number, is_live = mpd_feed(format_id) or (mpd_url, stream_number, False)
1816 if old_mpd_url == mpd_url and not refresh_sequence:
1817 return True, last_seq
1818 try:
1819 fmts, _ = self._extract_mpd_formats_and_subtitles(
1820 mpd_url, None, note=False, errnote=False, fatal=False)
1821 except ExtractorError:
1822 fmts = None
1823 if not fmts:
1824 no_fragment_score += 1
1825 return False, last_seq
1826 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
1827 fragments = fmt_info['fragments']
1828 fragment_base_url = fmt_info['fragment_base_url']
1829 assert fragment_base_url
1830
1831 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
1832 return True, _last_seq
1833
1834 while is_live:
1835 fetch_time = time.time()
1836 if no_fragment_score > 30:
1837 return
1838 if last_segment_url:
1839 # Obtain from "X-Head-Seqnum" header value from each segment
1840 try:
1841 urlh = self._request_webpage(
1842 last_segment_url, None, note=False, errnote=False, fatal=False)
1843 except ExtractorError:
1844 urlh = None
1845 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
1846 if last_seq is None:
1847 no_fragment_score += 1
1848 last_segment_url = None
1849 continue
1850 else:
1851 should_retry, last_seq = _extract_sequence_from_mpd(True)
1852 if not should_retry:
1853 continue
1854
1855 if known_idx > last_seq:
1856 last_segment_url = None
1857 continue
1858
1859 last_seq += 1
1860
1861 if begin_index < 0 and known_idx < 0:
1862 # skip from the start when it's negative value
1863 known_idx = last_seq + begin_index
1864 if lack_early_segments:
1865 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
1866 try:
1867 for idx in range(known_idx, last_seq):
1868 # do not update sequence here or you'll get skipped some part of it
1869 should_retry, _ = _extract_sequence_from_mpd(False)
1870 if not should_retry:
1871 # retry when it gets weird state
1872 known_idx = idx - 1
1873 raise ExtractorError('breaking out of outer loop')
1874 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
1875 yield {
1876 'url': last_segment_url,
1877 }
1878 if known_idx == last_seq:
1879 no_fragment_score += 5
1880 else:
1881 no_fragment_score = 0
1882 known_idx = last_seq
1883 except ExtractorError:
1884 continue
1885
1886 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
1887
b6de707d 1888 def _extract_player_url(self, *ytcfgs, webpage=None):
1889 player_url = traverse_obj(
1890 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1891 get_all=False, expected_type=compat_str)
11f9be09 1892 if not player_url:
b6de707d 1893 return
109dd3b2 1894 if player_url.startswith('//'):
1895 player_url = 'https:' + player_url
1896 elif not re.match(r'https?://', player_url):
1897 player_url = compat_urlparse.urljoin(
1898 'https://www.youtube.com', player_url)
1899 return player_url
1900
b6de707d 1901 def _download_player_url(self, video_id, fatal=False):
1902 res = self._download_webpage(
1903 'https://www.youtube.com/iframe_api',
1904 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1905 if res:
1906 player_version = self._search_regex(
1907 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1908 if player_version:
1909 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1910
60064c53
PH
1911 def _signature_cache_id(self, example_sig):
1912 """ Return a string representation of a signature """
78caa52a 1913 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1914
e40c758c
S
1915 @classmethod
1916 def _extract_player_info(cls, player_url):
1917 for player_re in cls._PLAYER_INFO_RE:
1918 id_m = re.search(player_re, player_url)
1919 if id_m:
1920 break
1921 else:
c081b35c 1922 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1923 return id_m.group('id')
e40c758c 1924
404f611f 1925 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 1926 player_id = self._extract_player_info(player_url)
1927 if player_id not in self._code_cache:
1276a43a 1928 code = self._download_webpage(
109dd3b2 1929 player_url, video_id, fatal=fatal,
1930 note='Downloading player ' + player_id,
1931 errnote='Download of %s failed' % player_url)
1276a43a 1932 if code:
1933 self._code_cache[player_id] = code
404f611f 1934 return self._code_cache.get(player_id)
109dd3b2 1935
e40c758c 1936 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1937 player_id = self._extract_player_info(player_url)
e0df6211 1938
c4417ddb 1939 # Read from filesystem cache
545cc85d 1940 func_id = 'js_%s_%s' % (
1941 player_id, self._signature_cache_id(example_sig))
c4417ddb 1942 assert os.path.basename(func_id) == func_id
a0e07d31 1943
69ea8ca4 1944 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1945 if cache_spec is not None:
78caa52a 1946 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1947
404f611f 1948 code = self._load_player(video_id, player_url)
1949 if code:
109dd3b2 1950 res = self._parse_sig_js(code)
e0df6211 1951
109dd3b2 1952 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1953 cache_res = res(test_string)
1954 cache_spec = [ord(c) for c in cache_res]
83799698 1955
109dd3b2 1956 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1957 return res
83799698 1958
60064c53 1959 def _print_sig_code(self, func, example_sig):
404f611f 1960 if not self.get_param('youtube_print_sig_code'):
1961 return
1962
edf3e38e
PH
1963 def gen_sig_code(idxs):
1964 def _genslice(start, end, step):
78caa52a 1965 starts = '' if start == 0 else str(start)
8bcc8756 1966 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1967 steps = '' if step == 1 else (':%d' % step)
78caa52a 1968 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1969
1970 step = None
7af808a5
PH
1971 # Quelch pyflakes warnings - start will be set when step is set
1972 start = '(Never used)'
edf3e38e
PH
1973 for i, prev in zip(idxs[1:], idxs[:-1]):
1974 if step is not None:
1975 if i - prev == step:
1976 continue
1977 yield _genslice(start, prev, step)
1978 step = None
1979 continue
1980 if i - prev in [-1, 1]:
1981 step = i - prev
1982 start = prev
1983 continue
1984 else:
78caa52a 1985 yield 's[%d]' % prev
edf3e38e 1986 if step is None:
78caa52a 1987 yield 's[%d]' % i
edf3e38e
PH
1988 else:
1989 yield _genslice(start, i, step)
1990
78caa52a 1991 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1992 cache_res = func(test_string)
edf3e38e 1993 cache_spec = [ord(c) for c in cache_res]
78caa52a 1994 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1995 signature_id_tuple = '(%s)' % (
1996 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1997 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1998 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1999 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2000
e0df6211
PH
2001 def _parse_sig_js(self, jscode):
2002 funcname = self._search_regex(
abefc03f
S
2003 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2004 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2005 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2006 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2007 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2008 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2009 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2010 # Obsolete patterns
2011 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2012 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2013 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2014 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2015 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2016 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2017 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2018 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2019 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2020
2021 jsi = JSInterpreter(jscode)
2022 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2023 return lambda s: initial_function([s])
2024
545cc85d 2025 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2026 """Turn the encrypted s field into a working signature"""
6b37f0be 2027
c8bf86d5 2028 if player_url is None:
69ea8ca4 2029 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 2030
c8bf86d5 2031 try:
62af3a0e 2032 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
2033 if player_id not in self._player_cache:
2034 func = self._extract_signature_function(
60064c53 2035 video_id, player_url, s
c8bf86d5
PH
2036 )
2037 self._player_cache[player_id] = func
2038 func = self._player_cache[player_id]
404f611f 2039 self._print_sig_code(func, s)
c8bf86d5
PH
2040 return func(s)
2041 except Exception as e:
404f611f 2042 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
2043
2044 def _decrypt_nsig(self, s, video_id, player_url):
2045 """Turn the encrypted n field into a working signature"""
2046 if player_url is None:
2047 raise ExtractorError('Cannot decrypt nsig without player_url')
2048 if player_url.startswith('//'):
2049 player_url = 'https:' + player_url
2050 elif not re.match(r'https?://', player_url):
2051 player_url = compat_urlparse.urljoin(
2052 'https://www.youtube.com', player_url)
2053
2054 sig_id = ('nsig_value', s)
2055 if sig_id in self._player_cache:
2056 return self._player_cache[sig_id]
2057
2058 try:
2059 player_id = ('nsig', player_url)
2060 if player_id not in self._player_cache:
2061 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
2062 func = self._player_cache[player_id]
2063 self._player_cache[sig_id] = func(s)
2064 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
2065 return self._player_cache[sig_id]
2066 except Exception as e:
aa9369a2 2067 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
404f611f 2068
2069 def _extract_n_function_name(self, jscode):
2070 return self._search_regex(
2071 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
2072 jscode, 'Initial JS player n function name', group='nfunc')
2073
2074 def _extract_n_function(self, video_id, player_url):
2075 player_id = self._extract_player_info(player_url)
2076 func_code = self._downloader.cache.load('youtube-nsig', player_id)
2077
2078 if func_code:
2079 jsi = JSInterpreter(func_code)
2080 else:
2081 jscode = self._load_player(video_id, player_url)
2082 funcname = self._extract_n_function_name(jscode)
2083 jsi = JSInterpreter(jscode)
2084 func_code = jsi.extract_function_code(funcname)
2085 self._downloader.cache.store('youtube-nsig', player_id, func_code)
2086
2087 if self.get_param('youtube_print_sig_code'):
2088 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2089
2090 return lambda s: jsi.extract_function_from_code(*func_code)([s])
e0df6211 2091
109dd3b2 2092 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2093 """
2094 Extract signatureTimestamp (sts)
2095 Required to tell API what sig/player version is in use.
2096 """
2097 sts = None
2098 if isinstance(ytcfg, dict):
2099 sts = int_or_none(ytcfg.get('STS'))
2100
2101 if not sts:
2102 # Attempt to extract from player
2103 if player_url is None:
2104 error_msg = 'Cannot extract signature timestamp without player_url.'
2105 if fatal:
2106 raise ExtractorError(error_msg)
2107 self.report_warning(error_msg)
2108 return
404f611f 2109 code = self._load_player(video_id, player_url, fatal=fatal)
2110 if code:
109dd3b2 2111 sts = int_or_none(self._search_regex(
2112 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2113 'JS player signature timestamp', group='sts', fatal=fatal))
2114 return sts
2115
11f9be09 2116 def _mark_watched(self, video_id, player_responses):
9222c381 2117 playback_url = get_first(
2118 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2119 expected_type=url_or_none)
d77ab8e2 2120 if not playback_url:
352d63fd 2121 self.report_warning('Unable to mark watched')
d77ab8e2
S
2122 return
2123 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2124 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2125
2126 # cpn generation algorithm is reverse engineered from base.js.
2127 # In fact it works even with dummy cpn.
2128 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2129 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2130
2131 qs.update({
2132 'ver': ['2'],
2133 'cpn': [cpn],
2134 })
2135 playback_url = compat_urlparse.urlunparse(
15707c7e 2136 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
2137
2138 self._download_webpage(
2139 playback_url, video_id, 'Marking watched',
2140 'Unable to mark watched', fatal=False)
2141
66c9fa36
S
2142 @staticmethod
2143 def _extract_urls(webpage):
2144 # Embedded YouTube player
2145 entries = [
2146 unescapeHTML(mobj.group('url'))
2147 for mobj in re.finditer(r'''(?x)
2148 (?:
2149 <iframe[^>]+?src=|
2150 data-video-url=|
2151 <embed[^>]+?src=|
2152 embedSWF\(?:\s*|
2153 <object[^>]+data=|
2154 new\s+SWFObject\(
2155 )
2156 (["\'])
2157 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2158 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2159 \1''', webpage)]
2160
2161 # lazyYT YouTube embed
2162 entries.extend(list(map(
2163 unescapeHTML,
2164 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2165
2166 # Wordpress "YouTube Video Importer" plugin
2167 matches = re.findall(r'''(?x)<div[^>]+
2168 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2169 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2170 entries.extend(m[-1] for m in matches)
2171
2172 return entries
2173
2174 @staticmethod
2175 def _extract_url(webpage):
2176 urls = YoutubeIE._extract_urls(webpage)
2177 return urls[0] if urls else None
2178
97665381
PH
2179 @classmethod
2180 def extract_id(cls, url):
2181 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2182 if mobj is None:
69ea8ca4 2183 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 2184 return mobj.group('id')
c5e8d7af 2185
7c365c21 2186 def _extract_chapters_from_json(self, data, duration):
2187 chapter_list = traverse_obj(
2188 data, (
2189 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2190 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2191 ), expected_type=list)
2192
2193 return self._extract_chapters(
2194 chapter_list,
2195 chapter_time=lambda chapter: float_or_none(
2196 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2197 chapter_title=lambda chapter: traverse_obj(
2198 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2199 duration=duration)
2200
2201 def _extract_chapters_from_engagement_panel(self, data, duration):
2202 content_list = traverse_obj(
8bdd16b4 2203 data,
7c365c21 2204 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2205 expected_type=list, default=[])
052e1350 2206 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2207 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2208
2209 return next((
2210 filter(None, (
2211 self._extract_chapters(
2212 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2213 chapter_time, chapter_title, duration)
2214 for contents in content_list
2215 ))), [])
2216
2217 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2218 chapters = []
7c365c21 2219 last_chapter = {'start_time': 0}
2220 for idx, chapter in enumerate(chapter_list or []):
2221 title = chapter_title(chapter)
84213ea8
S
2222 start_time = chapter_time(chapter)
2223 if start_time is None:
2224 continue
7c365c21 2225 last_chapter['end_time'] = start_time
2226 if start_time < last_chapter['start_time']:
2227 if idx == 1:
2228 chapters.pop()
2229 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2230 else:
2231 self.report_warning(f'Invalid start time for chapter "{title}"')
2232 continue
2233 last_chapter = {'start_time': start_time, 'title': title}
2234 chapters.append(last_chapter)
2235 last_chapter['end_time'] = duration
84213ea8
S
2236 return chapters
2237
545cc85d 2238 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2239 return self._parse_json(self._search_regex(
2240 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2241 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2242
a1c5d2ca
M
2243 def _extract_comment(self, comment_renderer, parent=None):
2244 comment_id = comment_renderer.get('commentId')
2245 if not comment_id:
2246 return
fe93e2c4 2247
052e1350 2248 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2249
49bd8c66 2250 # note: timestamp is an estimate calculated from the current time and time_text
f3aa3c3f 2251 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
052e1350 2252 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2253 author_id = try_get(comment_renderer,
2254 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2255
49bd8c66 2256 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2257 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2258 author_thumbnail = try_get(comment_renderer,
2259 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2260
2261 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2262 is_favorited = 'creatorHeart' in (try_get(
2263 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2264 return {
2265 'id': comment_id,
2266 'text': text,
d92f5d5a 2267 'timestamp': timestamp,
a1c5d2ca
M
2268 'time_text': time_text,
2269 'like_count': votes,
97524332 2270 'is_favorited': is_favorited,
a1c5d2ca
M
2271 'author': author,
2272 'author_id': author_id,
2273 'author_thumbnail': author_thumbnail,
2274 'author_is_uploader': author_is_uploader,
2275 'parent': parent or 'root'
2276 }
2277
46383212 2278 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2279
2280 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 2281
2282 def extract_header(contents):
2d6659b9 2283 _continuation = None
2284 for content in contents:
46383212 2285 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
fe93e2c4 2286 expected_comment_count = parse_count(self._get_text(
052e1350 2287 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2288
2d6659b9 2289 if expected_comment_count:
46383212 2290 tracker['est_total'] = expected_comment_count
2291 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2292 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 2293
2294 sort_menu_item = try_get(
2295 comments_header_renderer,
2296 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2297 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2298
2299 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2300 if not _continuation:
2301 continue
2302
46383212 2303 sort_text = str_or_none(sort_menu_item.get('title'))
2304 if not sort_text:
2d6659b9 2305 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 2306 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 2307 break
a2160aa4 2308 return _continuation
a1c5d2ca 2309
2d6659b9 2310 def extract_thread(contents):
a1c5d2ca 2311 if not parent:
46383212 2312 tracker['current_page_thread'] = 0
a1c5d2ca 2313 for content in contents:
46383212 2314 if not parent and tracker['total_parent_comments'] >= max_parents:
2315 yield
a1c5d2ca 2316 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 2317 comment_renderer = get_first(
2318 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2319 expected_type=dict, default={})
a1c5d2ca 2320
a1c5d2ca
M
2321 comment = self._extract_comment(comment_renderer, parent)
2322 if not comment:
2323 continue
46383212 2324
2325 tracker['running_total'] += 1
2326 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 2327 yield comment
46383212 2328
a1c5d2ca
M
2329 # Attempt to get the replies
2330 comment_replies_renderer = try_get(
2331 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2332
2333 if comment_replies_renderer:
46383212 2334 tracker['current_page_thread'] += 1
a1c5d2ca 2335 comment_entries_iter = self._comment_entries(
99e9e001 2336 comment_replies_renderer, ytcfg, video_id,
46383212 2337 parent=comment.get('id'), tracker=tracker)
2338 for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
a1c5d2ca
M
2339 yield reply_comment
2340
46383212 2341 # Keeps track of counts across recursive calls
2342 if not tracker:
2343 tracker = dict(
2344 running_total=0,
2345 est_total=0,
2346 current_page_thread=0,
2347 total_parent_comments=0,
2348 total_reply_comments=0)
2349
2350 # TODO: Deprecated
2d6659b9 2351 # YouTube comments have a max depth of 2
46383212 2352 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2353 if max_depth:
2354 self._downloader.deprecation_warning(
2355 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2d6659b9 2356 if max_depth == 1 and parent:
2357 return
a1c5d2ca 2358
46383212 2359 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2360 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 2361
46383212 2362 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 2363 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2364 if message and not parent:
2365 self.report_warning(message, video_id=video_id)
2366
46383212 2367 response = None
2d6659b9 2368 is_first_continuation = parent is None
a1c5d2ca
M
2369
2370 for page_num in itertools.count(0):
2371 if not continuation:
2372 break
46383212 2373 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2374 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 2375 if page_num == 0:
2376 if is_first_continuation:
2377 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2378 else:
2d6659b9 2379 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 2380 tracker['current_page_thread'], comment_prog_str)
2d6659b9 2381 else:
2382 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2383 ' ' if parent else '', ' replies' if parent else '',
2384 page_num, comment_prog_str)
2385
2386 response = self._extract_response(
fe93e2c4 2387 item_id=None, query=continuation,
2d6659b9 2388 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
46383212 2389 check_get_keys='onResponseReceivedEndpoints')
a1c5d2ca 2390
46383212 2391 continuation_contents = traverse_obj(
2392 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 2393
2d6659b9 2394 continuation = None
46383212 2395 for continuation_section in continuation_contents:
2396 continuation_items = traverse_obj(
2397 continuation_section,
2398 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2399 get_all=False, expected_type=list) or []
2400 if is_first_continuation:
2401 continuation = extract_header(continuation_items)
2402 is_first_continuation = False
2d6659b9 2403 if continuation:
a1c5d2ca 2404 break
46383212 2405 continue
a1c5d2ca 2406
46383212 2407 for entry in extract_thread(continuation_items):
2408 if not entry:
2409 return
2410 yield entry
2411 continuation = self._extract_continuation({'contents': continuation_items})
2412 if continuation:
2d6659b9 2413 break
a1c5d2ca 2414
a2160aa4 2415 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2416 """Entry for comment extraction"""
2d6659b9 2417 def _real_comment_extract(contents):
aae16f6e 2418 renderer = next((
2419 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2420 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2421 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 2422
a2160aa4 2423 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 2424 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 2425
109dd3b2 2426 @staticmethod
99e9e001 2427 def _get_checkok_params():
2428 return {'contentCheckOk': True, 'racyCheckOk': True}
2429
2430 @classmethod
2431 def _generate_player_context(cls, sts=None):
109dd3b2 2432 context = {
2433 'html5Preference': 'HTML5_PREF_WANTS',
2434 }
2435 if sts is not None:
2436 context['signatureTimestamp'] = sts
2437 return {
2438 'playbackContext': {
2439 'contentPlaybackContext': context
a1a7907b 2440 },
99e9e001 2441 **cls._get_checkok_params()
109dd3b2 2442 }
2443
e7e94f2a
D
2444 @staticmethod
2445 def _is_agegated(player_response):
2446 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2447 return True
e7e94f2a
D
2448
2449 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2450 AGE_GATE_REASONS = (
2451 'confirm your age', 'age-restricted', 'inappropriate', # reason
2452 'age_verification_required', 'age_check_required', # status
2453 )
2454 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2455
2456 @staticmethod
2457 def _is_unplayable(player_response):
2458 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2459
99e9e001 2460 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 2461
11f9be09 2462 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2463 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 2464 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 2465 headers = self.generate_api_headers(
99e9e001 2466 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 2467
11f9be09 2468 yt_query = {'videoId': video_id}
2469 yt_query.update(self._generate_player_context(sts))
2470 return self._extract_response(
2471 item_id=video_id, ep='player', query=yt_query,
379e44ed 2472 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 2473 default_client=client,
11f9be09 2474 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2475 ) or None
2476
11f9be09 2477 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2478 requested_clients = []
d0d012d4 2479 default = ['android', 'web']
000c15a4 2480 allowed_clients = sorted(
2481 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2482 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2483 for client in self._configuration_arg('player_client'):
2484 if client in allowed_clients:
2485 requested_clients.append(client)
d0d012d4 2486 elif client == 'default':
2487 requested_clients.extend(default)
b4c055ba 2488 elif client == 'all':
2489 requested_clients.extend(allowed_clients)
2490 else:
2491 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2492 if not requested_clients:
d0d012d4 2493 requested_clients = default
cf7e015f 2494
11f9be09 2495 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2496 requested_clients.extend(
e7e94f2a 2497 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 2498
11f9be09 2499 return orderedSet(requested_clients)
cf7e015f 2500
c0bc527b
M
2501 def _extract_player_ytcfg(self, client, video_id):
2502 url = {
2503 'web_music': 'https://music.youtube.com',
2504 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2505 }.get(client)
2506 if not url:
2507 return {}
2508 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2509 return self.extract_ytcfg(video_id, webpage) or {}
2510
99e9e001 2511 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 2512 initial_pr = None
2513 if webpage:
2514 initial_pr = self._extract_yt_initial_variable(
2515 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2516 video_id, 'initial player response')
6b09401b 2517
c0bc527b
M
2518 original_clients = clients
2519 clients = clients[::-1]
b6de707d 2520 prs = []
e7e94f2a
D
2521
2522 def append_client(client_name):
2523 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2524 clients.append(client_name)
2525
379e44ed 2526 # Android player_response does not have microFormats which are needed for
2527 # extraction of some data. So we return the initial_pr with formats
2528 # stripped out even if not requested by the user
2529 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 2530 if initial_pr:
2531 pr = dict(initial_pr)
2532 pr['streamingData'] = None
b6de707d 2533 prs.append(pr)
379e44ed 2534
2535 last_error = None
b6de707d 2536 tried_iframe_fallback = False
2537 player_url = None
c0bc527b
M
2538 while clients:
2539 client = clients.pop()
11f9be09 2540 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2541 if 'configs' not in self._configuration_arg('player_skip'):
2542 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b 2543
b6de707d 2544 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2545 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2546 if 'js' in self._configuration_arg('player_skip'):
2547 require_js_player = False
2548 player_url = None
2549
2550 if not player_url and not tried_iframe_fallback and require_js_player:
2551 player_url = self._download_player_url(video_id)
2552 tried_iframe_fallback = True
2553
379e44ed 2554 try:
2555 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 2556 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 2557 except ExtractorError as e:
2558 if last_error:
2559 self.report_warning(last_error)
2560 last_error = e
2561 continue
2562
11f9be09 2563 if pr:
b6de707d 2564 prs.append(pr)
c0bc527b 2565
e7e94f2a 2566 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
99e9e001 2567 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
e7e94f2a
D
2568 append_client(client.replace('_agegate', '_creator'))
2569 elif self._is_agegated(pr):
2570 append_client(f'{client}_agegate')
c0bc527b 2571
379e44ed 2572 if last_error:
b6de707d 2573 if not len(prs):
379e44ed 2574 raise last_error
2575 self.report_warning(last_error)
b6de707d 2576 return prs, player_url
11f9be09 2577
2578 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
a0bb6ce5 2579 itags, stream_ids = {}, []
2a9c6dcd 2580 itag_qualities, res_qualities = {}, {}
d3fc8074 2581 q = qualities([
2a9c6dcd 2582 # Normally tiny is the smallest video-only formats. But
2583 # audio-only formats with unknown quality may get tagged as tiny
2584 'tiny',
2585 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2586 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2587 ])
11f9be09 2588 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2589
545cc85d 2590 for fmt in streaming_formats:
2591 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2592 continue
321bf820 2593
cc2db878 2594 itag = str_or_none(fmt.get('itag'))
9297939e 2595 audio_track = fmt.get('audioTrack') or {}
2596 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2597 if stream_id in stream_ids:
2598 continue
2599
cc2db878 2600 quality = fmt.get('quality')
2a9c6dcd 2601 height = int_or_none(fmt.get('height'))
d3fc8074 2602 if quality == 'tiny' or not quality:
2603 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2604 # The 3gp format (17) in android client has a quality of "small",
2605 # but is actually worse than other formats
2606 if itag == '17':
2607 quality = 'tiny'
2608 if quality:
2609 if itag:
2610 itag_qualities[itag] = quality
2611 if height:
2612 res_qualities[height] = quality
cc2db878 2613 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2614 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2615 # number of fragment that would subsequently requested with (`&sq=N`)
2616 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2617 continue
2618
545cc85d 2619 fmt_url = fmt.get('url')
2620 if not fmt_url:
2621 sc = compat_parse_qs(fmt.get('signatureCipher'))
2622 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2623 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2624 if not (sc and fmt_url and encrypted_sig):
2625 continue
545cc85d 2626 if not player_url:
201e9eaa 2627 continue
545cc85d 2628 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2629 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2630 fmt_url += '&' + sp + '=' + signature
2631
404f611f 2632 query = parse_qs(fmt_url)
2633 throttled = False
b2916526 2634 if query.get('n'):
404f611f 2635 try:
2636 fmt_url = update_url_query(fmt_url, {
2637 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2638 except ExtractorError as e:
aa9369a2 2639 self.report_warning(
2640 f'nsig extraction failed: You may experience throttling for some formats\n'
2641 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
404f611f 2642 throttled = True
2643
545cc85d 2644 if itag:
a0bb6ce5 2645 itags[itag] = 'https'
9297939e 2646 stream_ids.append(stream_id)
2647
cc2db878 2648 tbr = float_or_none(
2649 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2650 dct = {
2651 'asr': int_or_none(fmt.get('audioSampleRate')),
2652 'filesize': int_or_none(fmt.get('contentLength')),
2653 'format_id': itag,
34921b43 2654 'format_note': join_nonempty(
26e8e044 2655 '%s%s' % (audio_track.get('displayName') or '',
2656 ' (default)' if audio_track.get('audioIsDefault') else ''),
404f611f 2657 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
34921b43 2658 throttled and 'THROTTLED', delim=', '),
c18d4482 2659 'source_preference': -10 if throttled else -1,
a4211baf 2660 'fps': int_or_none(fmt.get('fps')) or None,
2a9c6dcd 2661 'height': height,
dca3ff4a 2662 'quality': q(quality),
cc2db878 2663 'tbr': tbr,
545cc85d 2664 'url': fmt_url,
2a9c6dcd 2665 'width': int_or_none(fmt.get('width')),
0fb983f6 2666 'language': audio_track.get('id', '').split('.')[0],
26e8e044 2667 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
545cc85d 2668 }
60bdb7bd 2669 mime_mobj = re.match(
2670 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2671 if mime_mobj:
2672 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2673 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2674 no_audio = dct.get('acodec') == 'none'
2675 no_video = dct.get('vcodec') == 'none'
2676 if no_audio:
2677 dct['vbr'] = tbr
2678 if no_video:
2679 dct['abr'] = tbr
2680 if no_audio or no_video:
545cc85d 2681 dct['downloader_options'] = {
2682 # Youtube throttles chunks >~10M
2683 'http_chunk_size': 10485760,
bf1317d2 2684 }
7c60c33e 2685 if dct.get('ext'):
2686 dct['container'] = dct['ext'] + '_dash'
11f9be09 2687 yield dct
545cc85d 2688
adbc4ec4 2689 live_from_start = is_live and self.get_param('live_from_start')
4bb6b02f 2690 skip_manifests = self._configuration_arg('skip')
adbc4ec4
THD
2691 if not self.get_param('youtube_include_hls_manifest', True):
2692 skip_manifests.append('hls')
2693 get_dash = 'dash' not in skip_manifests and (
2694 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
2695 get_hls = not live_from_start and 'hls' not in skip_manifests
5d3a0e79 2696
a0bb6ce5 2697 def process_manifest_format(f, proto, itag):
2698 if itag in itags:
2699 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2700 return False
2701 itag = f'{itag}-{proto}'
2702 if itag:
2703 f['format_id'] = itag
2704 itags[itag] = proto
2705
2706 f['quality'] = next((
2707 q(qdict[val])
e339d25a 2708 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
a0bb6ce5 2709 if val in qdict), -1)
2710 return True
2a9c6dcd 2711
11f9be09 2712 for sd in streaming_data:
5d3a0e79 2713 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2714 if hls_manifest_url:
2a9c6dcd 2715 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
a0bb6ce5 2716 if process_manifest_format(f, 'hls', self._search_regex(
2717 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2718 yield f
545cc85d 2719
5d3a0e79 2720 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2721 if dash_manifest_url:
2a9c6dcd 2722 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
a0bb6ce5 2723 if process_manifest_format(f, 'dash', f['format_id']):
2724 f['filesize'] = int_or_none(self._search_regex(
2725 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
adbc4ec4
THD
2726 if live_from_start:
2727 f['is_from_start'] = True
2728
a0bb6ce5 2729 yield f
11f9be09 2730
720c3099 2731 def _extract_storyboard(self, player_responses, duration):
2732 spec = get_first(
2733 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2734 if not spec:
2735 return
2736 base_url = spec.pop()
2737 L = len(spec) - 1
2738 for i, args in enumerate(spec):
2739 args = args.split('#')
2740 counts = list(map(int_or_none, args[:5]))
2741 if len(args) != 8 or not all(counts):
2742 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2743 continue
2744 width, height, frame_count, cols, rows = counts
2745 N, sigh = args[6:]
2746
2747 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2748 fragment_count = frame_count / (cols * rows)
2749 fragment_duration = duration / fragment_count
2750 yield {
2751 'format_id': f'sb{i}',
2752 'format_note': 'storyboard',
2753 'ext': 'mhtml',
2754 'protocol': 'mhtml',
2755 'acodec': 'none',
2756 'vcodec': 'none',
2757 'url': url,
2758 'width': width,
2759 'height': height,
2760 'fragments': [{
2761 'path': url.replace('$M', str(j)),
2762 'duration': min(fragment_duration, duration - (j * fragment_duration)),
2763 } for j in range(math.ceil(fragment_count))],
2764 }
2765
adbc4ec4 2766 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 2767 webpage = None
2768 if 'webpage' not in self._configuration_arg('player_skip'):
2769 webpage = self._download_webpage(
2770 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
11f9be09 2771
2772 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 2773
b6de707d 2774 player_responses, player_url = self._extract_player_responses(
11f9be09 2775 self._get_requested_clients(url, smuggled_data),
99e9e001 2776 video_id, webpage, master_ytcfg)
11f9be09 2777
adbc4ec4
THD
2778 return webpage, master_ytcfg, player_responses, player_url
2779
2780 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):
2781 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2782 is_live = get_first(video_details, 'isLive')
2783 if is_live is None:
2784 is_live = get_first(live_broadcast_details, 'isLiveNow')
2785
2786 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2787 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2788
2789 return live_broadcast_details, is_live, streaming_data, formats
2790
2791 def _real_extract(self, url):
2792 url, smuggled_data = unsmuggle_url(url, {})
2793 video_id = self._match_id(url)
2794
2795 base_url = self.http_scheme() + '//www.youtube.com/'
2796 webpage_url = base_url + 'watch?v=' + video_id
2797
2798 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2799
11f9be09 2800 playability_statuses = traverse_obj(
2801 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2802
2803 trailer_video_id = get_first(
2804 playability_statuses,
2805 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2806 expected_type=str)
2807 if trailer_video_id:
2808 return self.url_result(
2809 trailer_video_id, self.ie_key(), trailer_video_id)
2810
2811 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2812 if webpage else (lambda x: None))
2813
2814 video_details = traverse_obj(
2815 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2816 microformats = traverse_obj(
2817 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2818 expected_type=dict, default=[])
2819 video_title = (
2820 get_first(video_details, 'title')
2821 or self._get_text(microformats, (..., 'title'))
2822 or search_meta(['og:title', 'twitter:title', 'title']))
2823 video_description = get_first(video_details, 'shortDescription')
2824
d89257f3 2825 multifeed_metadata_list = get_first(
2826 player_responses,
2827 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2828 expected_type=str)
2829 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2830 if self.get_param('noplaylist'):
11f9be09 2831 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 2832 else:
2833 entries = []
2834 feed_ids = []
2835 for feed in multifeed_metadata_list.split(','):
2836 # Unquote should take place before split on comma (,) since textual
2837 # fields may contain comma as well (see
2838 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2839 feed_data = compat_parse_qs(
2840 compat_urllib_parse_unquote_plus(feed))
2841
2842 def feed_entry(name):
2843 return try_get(
2844 feed_data, lambda x: x[name][0], compat_str)
2845
2846 feed_id = feed_entry('id')
2847 if not feed_id:
2848 continue
2849 feed_title = feed_entry('title')
2850 title = video_title
2851 if feed_title:
2852 title += ' (%s)' % feed_title
2853 entries.append({
2854 '_type': 'url_transparent',
2855 'ie_key': 'Youtube',
2856 'url': smuggle_url(
2857 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2858 {'force_singlefeed': True}),
2859 'title': title,
2860 })
2861 feed_ids.append(feed_id)
2862 self.to_screen(
2863 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2864 % (', '.join(feed_ids), video_id))
2865 return self.playlist_result(
2866 entries, video_id, video_title, video_description)
11f9be09 2867
adbc4ec4 2868 live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)
bf1317d2 2869
545cc85d 2870 if not formats:
11f9be09 2871 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 2872 self.report_drm(video_id)
11f9be09 2873 pemr = get_first(
2874 playability_statuses,
2875 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2876 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2877 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2878 if subreason:
545cc85d 2879 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2880 countries = get_first(microformats, 'availableCountries')
545cc85d 2881 if not countries:
2882 regions_allowed = search_meta('regionsAllowed')
2883 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2884 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2885 reason += f'. {subreason}'
545cc85d 2886 if reason:
b7da73eb 2887 self.raise_no_formats(reason, expected=True)
bf1317d2 2888
11f9be09 2889 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2890 if not keywords and webpage:
2891 keywords = [
2892 unescapeHTML(m.group('content'))
2893 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2894 for keyword in keywords:
2895 if keyword.startswith('yt:stretch='):
201c1459 2896 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2897 if mobj:
2898 # NB: float is intentional for forcing float division
2899 w, h = (float(v) for v in mobj.groups())
2900 if w > 0 and h > 0:
2901 ratio = w / h
2902 for f in formats:
2903 if f.get('vcodec') != 'none':
2904 f['stretched_ratio'] = ratio
2905 break
6449cd80 2906
545cc85d 2907 thumbnails = []
11f9be09 2908 thumbnail_dicts = traverse_obj(
2909 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2910 expected_type=dict, default=[])
2911 for thumbnail in thumbnail_dicts:
2912 thumbnail_url = thumbnail.get('url')
2913 if not thumbnail_url:
2914 continue
2915 # Sometimes youtube gives a wrong thumbnail URL. See:
2916 # https://github.com/yt-dlp/yt-dlp/issues/233
2917 # https://github.com/ytdl-org/youtube-dl/issues/28023
2918 if 'maxresdefault' in thumbnail_url:
2919 thumbnail_url = thumbnail_url.split('?')[0]
2920 thumbnails.append({
2921 'url': thumbnail_url,
2922 'height': int_or_none(thumbnail.get('height')),
2923 'width': int_or_none(thumbnail.get('width')),
2924 })
ff2751ac 2925 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2926 if thumbnail_url:
2927 thumbnails.append({
2928 'url': thumbnail_url,
ff2751ac 2929 })
fccf5021 2930 original_thumbnails = thumbnails.copy()
2931
0ba692ac 2932 # The best resolution thumbnails sometimes does not appear in the webpage
2933 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2934 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 2935 thumbnail_names = [
2936 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
cca80fe6 2937 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2938 'mqdefault', 'mq1', 'mq2', 'mq3',
2939 'default', '1', '2', '3'
2940 ]
cca80fe6 2941 n_thumbnail_names = len(thumbnail_names)
0ba692ac 2942 thumbnails.extend({
2943 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2944 video_id=video_id, name=name, ext=ext,
2945 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2946 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2947 for thumb in thumbnails:
cca80fe6 2948 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2949 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2950 self._remove_duplicate_formats(thumbnails)
fccf5021 2951 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 2952
7ea65411 2953 category = get_first(microformats, 'category') or search_meta('genre')
2954 channel_id = str_or_none(
2955 get_first(video_details, 'channelId')
2956 or get_first(microformats, 'externalChannelId')
2957 or search_meta('channelId'))
2958 duration = int_or_none(
2959 get_first(video_details, 'lengthSeconds')
2960 or get_first(microformats, 'lengthSeconds')
2961 or parse_duration(search_meta('duration'))) or None
2962 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2963
2964 live_content = get_first(video_details, 'isLiveContent')
2965 is_upcoming = get_first(video_details, 'isUpcoming')
2966 if is_live is None:
2967 if is_upcoming or live_content is False:
2968 is_live = False
2969 if is_upcoming is None and (live_content or is_live):
2970 is_upcoming = False
adbc4ec4
THD
2971 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2972 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2973 if not duration and live_end_time and live_start_time:
2974 duration = live_end_time - live_start_time
2975
2976 if is_live and self.get_param('live_from_start'):
2977 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
7ea65411 2978
720c3099 2979 formats.extend(self._extract_storyboard(player_responses, duration))
2980
2981 # Source is given priority since formats that throttle are given lower source_preference
2982 # When throttling issue is fully fixed, remove this
2983 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2984
545cc85d 2985 info = {
2986 'id': video_id,
39ca3b5c 2987 'title': video_title,
545cc85d 2988 'formats': formats,
2989 'thumbnails': thumbnails,
fccf5021 2990 # The best thumbnail that we are sure exists. Prevents unnecessary
2991 # URL checking if user don't care about getting the best possible thumbnail
2992 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 2993 'description': video_description,
2994 'upload_date': unified_strdate(
11f9be09 2995 get_first(microformats, 'uploadDate')
545cc85d 2996 or search_meta('uploadDate')),
11f9be09 2997 'uploader': get_first(video_details, 'author'),
545cc85d 2998 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2999 'uploader_url': owner_profile_url,
3000 'channel_id': channel_id,
11f9be09 3001 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 3002 'duration': duration,
3003 'view_count': int_or_none(
11f9be09 3004 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3005 or search_meta('interactionCount')),
11f9be09 3006 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3007 'age_limit': 18 if (
11f9be09 3008 get_first(microformats, 'isFamilySafe') is False
545cc85d 3009 or search_meta('isFamilyFriendly') == 'false'
3010 or search_meta('og:restrictions:age') == '18+') else 0,
3011 'webpage_url': webpage_url,
3012 'categories': [category] if category else None,
3013 'tags': keywords,
11f9be09 3014 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 3015 'is_live': is_live,
3016 'was_live': (False if is_live or is_upcoming or live_content is False
3017 else None if is_live is None or is_upcoming is None
3018 else live_content),
3019 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
adbc4ec4 3020 'release_timestamp': live_start_time,
545cc85d 3021 }
b477fc13 3022
3944e7af 3023 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3024 if pctr:
ecdc9049 3025 def get_lang_code(track):
3026 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3027 or track.get('languageCode'))
3028
3029 # Converted into dicts to remove duplicates
3030 captions = {
3031 get_lang_code(sub): sub
3032 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3033 translation_languages = {
3034 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3035 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3036
774d79cc 3037 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3038 lang_subs = container.setdefault(lang_code, [])
545cc85d 3039 for fmt in self._SUBTITLE_FORMATS:
3040 query.update({
3041 'fmt': fmt,
3042 })
3043 lang_subs.append({
3044 'ext': fmt,
3045 'url': update_url_query(base_url, query),
774d79cc 3046 'name': sub_name,
545cc85d 3047 })
7e72694b 3048
ecdc9049 3049 subtitles, automatic_captions = {}, {}
3050 for lang_code, caption_track in captions.items():
3051 base_url = caption_track.get('baseUrl')
545cc85d 3052 if not base_url:
3053 continue
ecdc9049 3054 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 3055 if caption_track.get('kind') != 'asr':
545cc85d 3056 if not lang_code:
3057 continue
3058 process_language(
ecdc9049 3059 subtitles, base_url, lang_code, lang_name, {})
3060 if not caption_track.get('isTranslatable'):
3061 continue
3944e7af 3062 for trans_code, trans_name in translation_languages.items():
3063 if not trans_code:
545cc85d 3064 continue
ecdc9049 3065 if caption_track.get('kind') != 'asr':
3066 trans_code += f'-{lang_code}'
3067 trans_name += format_field(lang_name, template=' from %s')
545cc85d 3068 process_language(
ecdc9049 3069 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
3070 info['automatic_captions'] = automatic_captions
3071 info['subtitles'] = subtitles
7e72694b 3072
545cc85d 3073 parsed_url = compat_urllib_parse_urlparse(url)
3074 for component in [parsed_url.fragment, parsed_url.query]:
3075 query = compat_parse_qs(component)
3076 for k, v in query.items():
3077 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3078 d_k += '_time'
3079 if d_k not in info and k in s_ks:
3080 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3081
3082 # Youtube Music Auto-generated description
822b9d9c 3083 if video_description:
38d70284 3084 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 3085 if mobj:
822b9d9c
RA
3086 release_year = mobj.group('release_year')
3087 release_date = mobj.group('release_date')
3088 if release_date:
3089 release_date = release_date.replace('-', '')
3090 if not release_year:
545cc85d 3091 release_year = release_date[:4]
3092 info.update({
3093 'album': mobj.group('album'.strip()),
3094 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3095 'track': mobj.group('track').strip(),
3096 'release_date': release_date,
cc2db878 3097 'release_year': int_or_none(release_year),
545cc85d 3098 })
7e72694b 3099
545cc85d 3100 initial_data = None
3101 if webpage:
3102 initial_data = self._extract_yt_initial_variable(
3103 webpage, self._YT_INITIAL_DATA_RE, video_id,
3104 'yt initial data')
3105 if not initial_data:
99e9e001 3106 query = {'videoId': video_id}
3107 query.update(self._get_checkok_params())
109dd3b2 3108 initial_data = self._extract_response(
3109 item_id=video_id, ep='next', fatal=False,
99e9e001 3110 ytcfg=master_ytcfg, query=query,
3111 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 3112 note='Downloading initial data API JSON')
545cc85d 3113
c60ee3a2 3114 try:
3115 # This will error if there is no livechat
3116 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
ecdc9049 3117 info.setdefault('subtitles', {})['live_chat'] = [{
c60ee3a2 3118 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3119 'video_id': video_id,
3120 'ext': 'json',
f6745c49 3121 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3122 }]
3123 except (KeyError, IndexError, TypeError):
3124 pass
545cc85d 3125
3126 if initial_data:
7c365c21 3127 info['chapters'] = (
3128 self._extract_chapters_from_json(initial_data, duration)
3129 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3130 or None)
545cc85d 3131
3132 contents = try_get(
3133 initial_data,
3134 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3135 list) or []
3136 for content in contents:
3137 vpir = content.get('videoPrimaryInfoRenderer')
3138 if vpir:
3139 stl = vpir.get('superTitleLink')
3140 if stl:
fe93e2c4 3141 stl = self._get_text(stl)
545cc85d 3142 if try_get(
3143 vpir,
3144 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3145 info['location'] = stl
3146 else:
3147 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3148 if mobj:
3149 info.update({
3150 'series': mobj.group(1),
3151 'season_number': int(mobj.group(2)),
3152 'episode_number': int(mobj.group(3)),
3153 })
3154 for tlb in (try_get(
3155 vpir,
3156 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3157 list) or []):
3158 tbr = tlb.get('toggleButtonRenderer') or {}
3159 for getter, regex in [(
3160 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3161 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3162 lambda x: x['accessibility'],
3163 lambda x: x['accessibilityData']['accessibilityData'],
3164 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3165 label = (try_get(tbr, getter, dict) or {}).get('label')
3166 if label:
3167 mobj = re.match(regex, label)
3168 if mobj:
3169 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3170 break
3171 sbr_tooltip = try_get(
3172 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3173 if sbr_tooltip:
3174 like_count, dislike_count = sbr_tooltip.split(' / ')
3175 info.update({
3176 'like_count': str_to_int(like_count),
3177 'dislike_count': str_to_int(dislike_count),
3178 })
3179 vsir = content.get('videoSecondaryInfoRenderer')
3180 if vsir:
052e1350 3181 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3182 rows = try_get(
3183 vsir,
3184 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3185 list) or []
3186 multiple_songs = False
3187 for row in rows:
3188 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3189 multiple_songs = True
3190 break
3191 for row in rows:
3192 mrr = row.get('metadataRowRenderer') or {}
3193 mrr_title = mrr.get('title')
3194 if not mrr_title:
3195 continue
052e1350 3196 mrr_title = self._get_text(mrr, 'title')
3197 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3198 if mrr_title == 'License':
3199 info['license'] = mrr_contents_text
3200 elif not multiple_songs:
3201 if mrr_title == 'Album':
3202 info['album'] = mrr_contents_text
3203 elif mrr_title == 'Artist':
3204 info['artist'] = mrr_contents_text
3205 elif mrr_title == 'Song':
3206 info['track'] = mrr_contents_text
3207
3208 fallbacks = {
3209 'channel': 'uploader',
3210 'channel_id': 'uploader_id',
3211 'channel_url': 'uploader_url',
3212 }
3213 for to, frm in fallbacks.items():
3214 if not info.get(to):
3215 info[to] = info.get(frm)
3216
3217 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3218 v = info.get(s_k)
3219 if v:
3220 info[d_k] = v
b84071c0 3221
11f9be09 3222 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3223 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3224 is_membersonly = None
b28f8d24 3225 is_premium = None
c224251a
M
3226 if initial_data and is_private is not None:
3227 is_membersonly = False
b28f8d24 3228 is_premium = False
47193e02 3229 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3230 badge_labels = set()
3231 for content in contents:
3232 if not isinstance(content, dict):
3233 continue
3234 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3235 for badge_label in badge_labels:
3236 if badge_label.lower() == 'members only':
3237 is_membersonly = True
3238 elif badge_label.lower() == 'premium':
3239 is_premium = True
3240 elif badge_label.lower() == 'unlisted':
3241 is_unlisted = True
c224251a 3242
c224251a
M
3243 info['availability'] = self._availability(
3244 is_private=is_private,
b28f8d24 3245 needs_premium=is_premium,
c224251a
M
3246 needs_subscription=is_membersonly,
3247 needs_auth=info['age_limit'] >= 18,
3248 is_unlisted=None if is_private is None else is_unlisted)
3249
a2160aa4 3250 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3251
11f9be09 3252 self.mark_watched(video_id, player_responses)
d77ab8e2 3253
545cc85d 3254 return info
c5e8d7af 3255
a61fd4cf 3256
a6213a49 3257class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 3258
a6213a49 3259 def _extract_channel_id(self, webpage):
3260 channel_id = self._html_search_meta(
3261 'channelId', webpage, 'channel id', default=None)
3262 if channel_id:
3263 return channel_id
3264 channel_url = self._html_search_meta(
3265 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3266 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3267 'twitter:app:url:googleplay'), webpage, 'channel url')
3268 return self._search_regex(
3269 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3270 channel_url, 'channel id')
15f6397c 3271
8bdd16b4 3272 @staticmethod
cd7c66cf 3273 def _extract_basic_item_renderer(item):
3274 # Modified from _extract_grid_item_renderer
201c1459 3275 known_basic_renderers = (
3276 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3277 )
3278 for key, renderer in item.items():
201c1459 3279 if not isinstance(renderer, dict):
cd7c66cf 3280 continue
201c1459 3281 elif key in known_basic_renderers:
3282 return renderer
3283 elif key.startswith('grid') and key.endswith('Renderer'):
3284 return renderer
8bdd16b4 3285
8bdd16b4 3286 def _grid_entries(self, grid_renderer):
3287 for item in grid_renderer['items']:
3288 if not isinstance(item, dict):
39b62db1 3289 continue
cd7c66cf 3290 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3291 if not isinstance(renderer, dict):
3292 continue
052e1350 3293 title = self._get_text(renderer, 'title')
fe93e2c4 3294
8bdd16b4 3295 # playlist
3296 playlist_id = renderer.get('playlistId')
3297 if playlist_id:
3298 yield self.url_result(
3299 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3300 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3301 video_title=title)
201c1459 3302 continue
8bdd16b4 3303 # video
3304 video_id = renderer.get('videoId')
3305 if video_id:
3306 yield self._extract_video(renderer)
201c1459 3307 continue
8bdd16b4 3308 # channel
3309 channel_id = renderer.get('channelId')
3310 if channel_id:
8bdd16b4 3311 yield self.url_result(
3312 'https://www.youtube.com/channel/%s' % channel_id,
3313 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3314 continue
3315 # generic endpoint URL support
3316 ep_url = urljoin('https://www.youtube.com/', try_get(
3317 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3318 compat_str))
3319 if ep_url:
3320 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3321 if ie.suitable(ep_url):
3322 yield self.url_result(
3323 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3324 break
8bdd16b4 3325
3d3dddc9 3326 def _shelf_entries_from_content(self, shelf_renderer):
3327 content = shelf_renderer.get('content')
3328 if not isinstance(content, dict):
8bdd16b4 3329 return
cd7c66cf 3330 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3331 if renderer:
3332 # TODO: add support for nested playlists so each shelf is processed
3333 # as separate playlist
3334 # TODO: this includes only first N items
3335 for entry in self._grid_entries(renderer):
3336 yield entry
3337 renderer = content.get('horizontalListRenderer')
3338 if renderer:
3339 # TODO
3340 pass
8bdd16b4 3341
29f7c58a 3342 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3343 ep = try_get(
3344 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3345 compat_str)
3346 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3347 if shelf_url:
29f7c58a 3348 # Skipping links to another channels, note that checking for
3349 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3350 # will not work
3351 if skip_channels and '/channels?' in shelf_url:
3352 return
052e1350 3353 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3354 yield self.url_result(shelf_url, video_title=title)
3355 # Shelf may not contain shelf URL, fallback to extraction from content
3356 for entry in self._shelf_entries_from_content(shelf_renderer):
3357 yield entry
c5e8d7af 3358
8bdd16b4 3359 def _playlist_entries(self, video_list_renderer):
3360 for content in video_list_renderer['contents']:
3361 if not isinstance(content, dict):
3362 continue
3363 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3364 if not isinstance(renderer, dict):
3365 continue
3366 video_id = renderer.get('videoId')
3367 if not video_id:
3368 continue
3369 yield self._extract_video(renderer)
07aeced6 3370
3462ffa8 3371 def _rich_entries(self, rich_grid_renderer):
3372 renderer = try_get(
70d5c17b 3373 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3374 video_id = renderer.get('videoId')
3375 if not video_id:
3376 return
3377 yield self._extract_video(renderer)
3378
8bdd16b4 3379 def _video_entry(self, video_renderer):
3380 video_id = video_renderer.get('videoId')
3381 if video_id:
3382 return self._extract_video(video_renderer)
dacb3a86 3383
8bdd16b4 3384 def _post_thread_entries(self, post_thread_renderer):
3385 post_renderer = try_get(
3386 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3387 if not post_renderer:
3388 return
3389 # video attachment
3390 video_renderer = try_get(
895b0931 3391 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3392 video_id = video_renderer.get('videoId')
3393 if video_id:
3394 entry = self._extract_video(video_renderer)
8bdd16b4 3395 if entry:
3396 yield entry
895b0931 3397 # playlist attachment
3398 playlist_id = try_get(
3399 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3400 if playlist_id:
3401 yield self.url_result(
e28f1c0a 3402 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3403 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3404 # inline video links
3405 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3406 for run in runs:
3407 if not isinstance(run, dict):
3408 continue
3409 ep_url = try_get(
3410 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3411 if not ep_url:
3412 continue
3413 if not YoutubeIE.suitable(ep_url):
3414 continue
3415 ep_video_id = YoutubeIE._match_id(ep_url)
3416 if video_id == ep_video_id:
3417 continue
895b0931 3418 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3419
8bdd16b4 3420 def _post_thread_continuation_entries(self, post_thread_continuation):
3421 contents = post_thread_continuation.get('contents')
3422 if not isinstance(contents, list):
3423 return
3424 for content in contents:
3425 renderer = content.get('backstagePostThreadRenderer')
3426 if not isinstance(renderer, dict):
3427 continue
3428 for entry in self._post_thread_entries(renderer):
3429 yield entry
07aeced6 3430
39ed931e 3431 r''' # unused
3432 def _rich_grid_entries(self, contents):
3433 for content in contents:
3434 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3435 if video_renderer:
3436 entry = self._video_entry(video_renderer)
3437 if entry:
3438 yield entry
3439 '''
a6213a49 3440 def _extract_entries(self, parent_renderer, continuation_list):
3441 # continuation_list is modified in-place with continuation_list = [continuation_token]
3442 continuation_list[:] = [None]
3443 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3444 for content in contents:
3445 if not isinstance(content, dict):
3446 continue
3447 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3448 if not is_renderer:
3449 renderer = content.get('richItemRenderer')
3450 if renderer:
3451 for entry in self._rich_entries(renderer):
3452 yield entry
3453 continuation_list[0] = self._extract_continuation(parent_renderer)
3454 continue
3455 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3456 for isr_content in isr_contents:
3457 if not isinstance(isr_content, dict):
8bdd16b4 3458 continue
69184e41 3459
a6213a49 3460 known_renderers = {
3461 'playlistVideoListRenderer': self._playlist_entries,
3462 'gridRenderer': self._grid_entries,
3463 'shelfRenderer': lambda x: self._shelf_entries(x),
3464 'backstagePostThreadRenderer': self._post_thread_entries,
3465 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 3466 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3467 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
a6213a49 3468 }
3469 for key, renderer in isr_content.items():
3470 if key not in known_renderers:
3471 continue
3472 for entry in known_renderers[key](renderer):
3473 if entry:
3474 yield entry
3475 continuation_list[0] = self._extract_continuation(renderer)
3476 break
70d5c17b 3477
3478 if not continuation_list[0]:
a6213a49 3479 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 3480
a6213a49 3481 if not continuation_list[0]:
3482 continuation_list[0] = self._extract_continuation(parent_renderer)
3483
3484 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3485 continuation_list = [None]
3486 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 3487 tab_content = try_get(tab, lambda x: x['content'], dict)
3488 if not tab_content:
3489 return
3462ffa8 3490 parent_renderer = (
29f7c58a 3491 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3492 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3493 for entry in extract_entries(parent_renderer):
3494 yield entry
3462ffa8 3495 continuation = continuation_list[0]
d069eca7 3496
8bdd16b4 3497 for page_num in itertools.count(1):
3498 if not continuation:
3499 break
99e9e001 3500 headers = self.generate_api_headers(
3501 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 3502 response = self._extract_response(
3503 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3504 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3505 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3506
3507 if not response:
8bdd16b4 3508 break
ac56cf38 3509 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3510 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3511 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 3512
69184e41 3513 known_continuation_renderers = {
3514 'playlistVideoListContinuation': self._playlist_entries,
3515 'gridContinuation': self._grid_entries,
3516 'itemSectionContinuation': self._post_thread_continuation_entries,
3517 'sectionListContinuation': extract_entries, # for feeds
3518 }
8bdd16b4 3519 continuation_contents = try_get(
69184e41 3520 response, lambda x: x['continuationContents'], dict) or {}
3521 continuation_renderer = None
3522 for key, value in continuation_contents.items():
3523 if key not in known_continuation_renderers:
3462ffa8 3524 continue
69184e41 3525 continuation_renderer = value
3526 continuation_list = [None]
3527 for entry in known_continuation_renderers[key](continuation_renderer):
3528 yield entry
3529 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3530 break
3531 if continuation_renderer:
3532 continue
c5e8d7af 3533
a1b535bd 3534 known_renderers = {
3535 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3536 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3537 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3538 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3539 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3540 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3541 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3542 }
cce889b9 3543 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3544 continuation_items = try_get(
cce889b9 3545 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3546 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3547 video_items_renderer = None
3548 for key, value in continuation_item.items():
3549 if key not in known_renderers:
8bdd16b4 3550 continue
a1b535bd 3551 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3552 continuation_list = [None]
a1b535bd 3553 for entry in known_renderers[key][0](video_items_renderer):
3554 yield entry
9ba5705a 3555 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3556 break
3557 if video_items_renderer:
3558 continue
8bdd16b4 3559 break
9558dcec 3560
8bdd16b4 3561 @staticmethod
3562 def _extract_selected_tab(tabs):
3563 for tab in tabs:
cd684175 3564 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3565 if renderer.get('selected') is True:
3566 return renderer
2b3c2546 3567 else:
8bdd16b4 3568 raise ExtractorError('Unable to find selected tab')
b82f815f 3569
47193e02 3570 @classmethod
3571 def _extract_uploader(cls, data):
8bdd16b4 3572 uploader = {}
47193e02 3573 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3574 owner = try_get(
3575 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3576 if owner:
3577 uploader['uploader'] = owner.get('text')
3578 uploader['uploader_id'] = try_get(
3579 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3580 uploader['uploader_url'] = urljoin(
3581 'https://www.youtube.com/',
3582 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3583 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3584
ac56cf38 3585 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 3586 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 3587 thumbnails_list = []
3588 tags = []
b60419c5 3589
8bdd16b4 3590 selected_tab = self._extract_selected_tab(tabs)
3591 renderer = try_get(
3592 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3593 if renderer:
b60419c5 3594 channel_name = renderer.get('title')
3595 channel_url = renderer.get('channelUrl')
3596 channel_id = renderer.get('externalId')
39ed931e 3597 else:
64c0d954 3598 renderer = try_get(
3599 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3600
8bdd16b4 3601 if renderer:
3602 title = renderer.get('title')
ecc97af3 3603 description = renderer.get('description', '')
b60419c5 3604 playlist_id = channel_id
3605 tags = renderer.get('keywords', '').split()
3606 thumbnails_list = (
3607 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3608 or try_get(
47193e02 3609 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3610 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3611 list)
b60419c5 3612 or [])
3613
3614 thumbnails = []
3615 for t in thumbnails_list:
3616 if not isinstance(t, dict):
3617 continue
3618 thumbnail_url = url_or_none(t.get('url'))
3619 if not thumbnail_url:
3620 continue
3621 thumbnails.append({
3622 'url': thumbnail_url,
3623 'width': int_or_none(t.get('width')),
3624 'height': int_or_none(t.get('height')),
3625 })
3462ffa8 3626 if playlist_id is None:
70d5c17b 3627 playlist_id = item_id
3628 if title is None:
39ed931e 3629 title = (
3630 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3631 or playlist_id)
b60419c5 3632 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3633 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3634 metadata = {
3635 'playlist_id': playlist_id,
3636 'playlist_title': title,
3637 'playlist_description': description,
3638 'uploader': channel_name,
3639 'uploader_id': channel_id,
3640 'uploader_url': channel_url,
3641 'thumbnails': thumbnails,
3642 'tags': tags,
3643 }
47193e02 3644 availability = self._extract_availability(data)
3645 if availability:
3646 metadata['availability'] = availability
b60419c5 3647 if not channel_id:
3648 metadata.update(self._extract_uploader(data))
3649 metadata.update({
3650 'channel': metadata['uploader'],
3651 'channel_id': metadata['uploader_id'],
3652 'channel_url': metadata['uploader_url']})
3653 return self.playlist_result(
d069eca7 3654 self._entries(
ac56cf38 3655 selected_tab, playlist_id, ytcfg,
3656 self._extract_account_syncid(ytcfg, data),
3657 self._extract_visitor_data(data, ytcfg)),
b60419c5 3658 **metadata)
73c4ac2c 3659
ac56cf38 3660 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3661 first_id = last_id = response = None
2be71994 3662 for page_num in itertools.count(1):
cd7c66cf 3663 videos = list(self._playlist_entries(playlist))
3664 if not videos:
3665 return
2be71994 3666 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3667 if start >= len(videos):
3668 return
3669 for video in videos[start:]:
3670 if video['id'] == first_id:
3671 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3672 return
3673 yield video
3674 first_id = first_id or videos[0]['id']
3675 last_id = videos[-1]['id']
79360d99 3676 watch_endpoint = try_get(
3677 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 3678 headers = self.generate_api_headers(
3679 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3680 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 3681 query = {
3682 'playlistId': playlist_id,
3683 'videoId': watch_endpoint.get('videoId') or last_id,
3684 'index': watch_endpoint.get('index') or len(videos),
3685 'params': watch_endpoint.get('params') or 'OAE%3D'
3686 }
3687 response = self._extract_response(
3688 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3689 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3690 check_get_keys='contents'
3691 )
cd7c66cf 3692 playlist = try_get(
79360d99 3693 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3694
ac56cf38 3695 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 3696 title = playlist.get('title') or try_get(
3697 data, lambda x: x['titleText']['simpleText'], compat_str)
3698 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3699
3700 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3701 playlist_url = urljoin(url, try_get(
3702 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3703 compat_str))
3704 if playlist_url and playlist_url != url:
3705 return self.url_result(
3706 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3707 video_title=title)
cd7c66cf 3708
8bdd16b4 3709 return self.playlist_result(
ac56cf38 3710 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 3711 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3712
47193e02 3713 def _extract_availability(self, data):
3714 """
3715 Gets the availability of a given playlist/tab.
3716 Note: Unless YouTube tells us explicitly, we do not assume it is public
3717 @param data: response
3718 """
3719 is_private = is_unlisted = None
3720 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3721 badge_labels = self._extract_badges(renderer)
3722
3723 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3724 privacy_dropdown_entries = try_get(
3725 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3726 for renderer_dict in privacy_dropdown_entries:
3727 is_selected = try_get(
3728 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3729 if not is_selected:
3730 continue
052e1350 3731 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 3732 if label:
3733 badge_labels.add(label.lower())
3734 break
3735
3736 for badge_label in badge_labels:
3737 if badge_label == 'unlisted':
3738 is_unlisted = True
3739 elif badge_label == 'private':
3740 is_private = True
3741 elif badge_label == 'public':
3742 is_unlisted = is_private = False
3743 return self._availability(is_private, False, False, False, is_unlisted)
3744
3745 @staticmethod
3746 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3747 sidebar_renderer = try_get(
3748 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3749 for item in sidebar_renderer:
3750 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3751 if renderer:
3752 return renderer
3753
ac56cf38 3754 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 3755 """
3756 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3757 """
5d342002 3758 browse_id = params = None
47193e02 3759 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3760 if not renderer:
3761 return
3762 menu_renderer = try_get(
3763 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3764 for menu_item in menu_renderer:
3765 if not isinstance(menu_item, dict):
358de58c 3766 continue
47193e02 3767 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3768 text = try_get(
3769 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3770 if not text or text.lower() != 'show unavailable videos':
3771 continue
3772 browse_endpoint = try_get(
3773 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3774 browse_id = browse_endpoint.get('browseId')
3775 params = browse_endpoint.get('params')
3776 break
5d342002 3777
11f9be09 3778 headers = self.generate_api_headers(
99e9e001 3779 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 3780 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 3781 query = {
3782 'params': params or 'wgYCCAA=',
3783 'browseId': browse_id or 'VL%s' % item_id
3784 }
3785 return self._extract_response(
3786 item_id=item_id, headers=headers, query=query,
fe93e2c4 3787 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 3788 note='Downloading API JSON with unavailable videos')
358de58c 3789
ac56cf38 3790 def _extract_webpage(self, url, item_id, fatal=True):
a06916d9 3791 retries = self.get_param('extractor_retries', 3)
62bff2c1 3792 count = -1
ac56cf38 3793 webpage = data = last_error = None
14fdfea9 3794 while count < retries:
62bff2c1 3795 count += 1
14fdfea9 3796 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3797 # See: https://github.com/yt-dlp/yt-dlp/issues/116
ac56cf38 3798 if last_error:
c705177d 3799 self.report_warning('%s. Retrying ...' % last_error)
ac56cf38 3800 try:
3801 webpage = self._download_webpage(
3802 url, item_id,
3803 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3804 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3805 except ExtractorError as e:
3806 if isinstance(e.cause, network_exceptions):
3807 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3808 last_error = error_to_compat_str(e.cause or e.msg)
3809 if count < retries:
3810 continue
3811 if fatal:
3812 raise
3813 self.report_warning(error_to_compat_str(e))
14fdfea9 3814 break
ac56cf38 3815 else:
3816 try:
3817 self._extract_and_report_alerts(data)
3818 except ExtractorError as e:
3819 if fatal:
3820 raise
3821 self.report_warning(error_to_compat_str(e))
3822 break
3823
3824 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3825 break
3826
3827 last_error = 'Incomplete yt initial data received'
3828 if count >= retries:
3829 if fatal:
3830 raise ExtractorError(last_error)
3831 self.report_warning(last_error)
3832 break
3833
cd7c66cf 3834 return webpage, data
3835
ac56cf38 3836 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3837 data = None
3838 if 'webpage' not in self._configuration_arg('skip'):
3839 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3840 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3841 if not data:
3842 if not ytcfg and self.is_authenticated:
3843 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3844 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3845 raise ExtractorError(
3846 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3847 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3848 expected=True)
3849 self.report_warning(msg, only_once=True)
3850 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3851 return data, ytcfg
3852
3853 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3854 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3855 resolve_response = self._extract_response(
3856 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3857 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3858 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3859 for ep_key, ep in endpoints.items():
3860 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3861 if params:
3862 return self._extract_response(
3863 item_id=item_id, query=params, ep=ep, headers=headers,
3864 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3865 check_get_keys=('contents', 'currentVideoEndpoint'))
3866 err_note = 'Failed to resolve url (does the playlist exist?)'
3867 if fatal:
3868 raise ExtractorError(err_note, expected=True)
3869 self.report_warning(err_note, item_id)
3870
a6213a49 3871 @staticmethod
3872 def _smuggle_data(entries, data):
3873 for entry in entries:
3874 if data:
3875 entry['url'] = smuggle_url(entry['url'], data)
3876 yield entry
3877
3878 _SEARCH_PARAMS = None
3879
3880 def _search_results(self, query, params=NO_DEFAULT):
3881 data = {'query': query}
3882 if params is NO_DEFAULT:
3883 params = self._SEARCH_PARAMS
3884 if params:
3885 data['params'] = params
a61fd4cf 3886 continuation_list = [None]
a6213a49 3887 for page_num in itertools.count(1):
a61fd4cf 3888 data.update(continuation_list[0] or {})
a6213a49 3889 search = self._extract_response(
3890 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
a61fd4cf 3891 check_get_keys=('contents', 'onResponseReceivedCommands'))
a6213a49 3892 slr_contents = try_get(
3893 search,
3894 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3895 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3896 list)
a61fd4cf 3897 yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3898 if not continuation_list[0]:
a6213a49 3899 break
3900
3901
3902class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3903 IE_DESC = 'YouTube Tabs'
3904 _VALID_URL = r'''(?x:
3905 https?://
3906 (?:\w+\.)?
3907 (?:
3908 youtube(?:kids)?\.com|
3909 %(invidious)s
3910 )/
3911 (?:
3912 (?P<channel_type>channel|c|user|browse)/|
3913 (?P<not_channel>
3914 feed/|hashtag/|
3915 (?:playlist|watch)\?.*?\blist=
3916 )|
3917 (?!(?:%(reserved_names)s)\b) # Direct URLs
3918 )
3919 (?P<id>[^/?\#&]+)
3920 )''' % {
3921 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3922 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3923 }
3924 IE_NAME = 'youtube:tab'
3925
3926 _TESTS = [{
3927 'note': 'playlists, multipage',
3928 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3929 'playlist_mincount': 94,
3930 'info_dict': {
3931 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3932 'title': 'Игорь Клейнер - Playlists',
3933 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3934 'uploader': 'Игорь Клейнер',
3935 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3936 },
3937 }, {
3938 'note': 'playlists, multipage, different order',
3939 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3940 'playlist_mincount': 94,
3941 'info_dict': {
3942 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3943 'title': 'Игорь Клейнер - Playlists',
3944 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3945 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3946 'uploader': 'Игорь Клейнер',
3947 },
3948 }, {
3949 'note': 'playlists, series',
3950 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3951 'playlist_mincount': 5,
3952 'info_dict': {
3953 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3954 'title': '3Blue1Brown - Playlists',
3955 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3956 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3957 'uploader': '3Blue1Brown',
3958 },
3959 }, {
3960 'note': 'playlists, singlepage',
3961 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3962 'playlist_mincount': 4,
3963 'info_dict': {
3964 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3965 'title': 'ThirstForScience - Playlists',
3966 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3967 'uploader': 'ThirstForScience',
3968 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3969 }
3970 }, {
3971 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3972 'only_matching': True,
3973 }, {
3974 'note': 'basic, single video playlist',
3975 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3976 'info_dict': {
3977 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3978 'uploader': 'Sergey M.',
3979 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3980 'title': 'youtube-dl public playlist',
3981 },
3982 'playlist_count': 1,
3983 }, {
3984 'note': 'empty playlist',
3985 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3986 'info_dict': {
3987 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3988 'uploader': 'Sergey M.',
3989 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3990 'title': 'youtube-dl empty playlist',
3991 },
3992 'playlist_count': 0,
3993 }, {
3994 'note': 'Home tab',
3995 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3996 'info_dict': {
3997 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3998 'title': 'lex will - Home',
3999 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4000 'uploader': 'lex will',
4001 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4002 },
4003 'playlist_mincount': 2,
4004 }, {
4005 'note': 'Videos tab',
4006 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4007 'info_dict': {
4008 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4009 'title': 'lex will - Videos',
4010 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4011 'uploader': 'lex will',
4012 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4013 },
4014 'playlist_mincount': 975,
4015 }, {
4016 'note': 'Videos tab, sorted by popular',
4017 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4018 'info_dict': {
4019 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4020 'title': 'lex will - Videos',
4021 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4022 'uploader': 'lex will',
4023 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4024 },
4025 'playlist_mincount': 199,
4026 }, {
4027 'note': 'Playlists tab',
4028 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4029 'info_dict': {
4030 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4031 'title': 'lex will - Playlists',
4032 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4033 'uploader': 'lex will',
4034 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4035 },
4036 'playlist_mincount': 17,
4037 }, {
4038 'note': 'Community tab',
4039 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4040 'info_dict': {
4041 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4042 'title': 'lex will - Community',
4043 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4044 'uploader': 'lex will',
4045 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4046 },
4047 'playlist_mincount': 18,
4048 }, {
4049 'note': 'Channels tab',
4050 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4051 'info_dict': {
4052 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4053 'title': 'lex will - Channels',
4054 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4055 'uploader': 'lex will',
4056 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4057 },
4058 'playlist_mincount': 12,
4059 }, {
4060 'note': 'Search tab',
4061 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4062 'playlist_mincount': 40,
4063 'info_dict': {
4064 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4065 'title': '3Blue1Brown - Search - linear algebra',
4066 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4067 'uploader': '3Blue1Brown',
4068 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4069 },
4070 }, {
4071 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4072 'only_matching': True,
4073 }, {
4074 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4075 'only_matching': True,
4076 }, {
4077 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4078 'only_matching': True,
4079 }, {
4080 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4081 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4082 'info_dict': {
4083 'title': '29C3: Not my department',
4084 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4085 'uploader': 'Christiaan008',
4086 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4087 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
4088 },
4089 'playlist_count': 96,
4090 }, {
4091 'note': 'Large playlist',
4092 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4093 'info_dict': {
4094 'title': 'Uploads from Cauchemar',
4095 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4096 'uploader': 'Cauchemar',
4097 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4098 },
4099 'playlist_mincount': 1123,
4100 }, {
4101 'note': 'even larger playlist, 8832 videos',
4102 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4103 'only_matching': True,
4104 }, {
4105 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4106 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4107 'info_dict': {
4108 'title': 'Uploads from Interstellar Movie',
4109 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4110 'uploader': 'Interstellar Movie',
4111 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4112 },
4113 'playlist_mincount': 21,
4114 }, {
4115 'note': 'Playlist with "show unavailable videos" button',
4116 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4117 'info_dict': {
4118 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4119 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4120 'uploader': 'Phim Siêu Nhân Nhật Bản',
4121 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4122 },
4123 'playlist_mincount': 200,
4124 }, {
4125 'note': 'Playlist with unavailable videos in page 7',
4126 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
4127 'info_dict': {
4128 'title': 'Uploads from BlankTV',
4129 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4130 'uploader': 'BlankTV',
4131 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4132 },
4133 'playlist_mincount': 1000,
4134 }, {
4135 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4136 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4137 'info_dict': {
4138 'title': 'Data Analysis with Dr Mike Pound',
4139 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4140 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4141 'uploader': 'Computerphile',
4142 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
4143 },
4144 'playlist_mincount': 11,
4145 }, {
4146 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4147 'only_matching': True,
4148 }, {
4149 'note': 'Playlist URL that does not actually serve a playlist',
4150 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4151 'info_dict': {
4152 'id': 'FqZTN594JQw',
4153 'ext': 'webm',
4154 'title': "Smiley's People 01 detective, Adventure Series, Action",
4155 'uploader': 'STREEM',
4156 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4157 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4158 'upload_date': '20150526',
4159 'license': 'Standard YouTube License',
4160 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4161 'categories': ['People & Blogs'],
4162 'tags': list,
4163 'view_count': int,
4164 'like_count': int,
4165 'dislike_count': int,
4166 },
4167 'params': {
4168 'skip_download': True,
4169 },
4170 'skip': 'This video is not available.',
4171 'add_ie': [YoutubeIE.ie_key()],
4172 }, {
4173 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4174 'only_matching': True,
4175 }, {
4176 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4177 'only_matching': True,
4178 }, {
4179 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4180 'info_dict': {
4181 'id': '3yImotZU3tw', # This will keep changing
4182 'ext': 'mp4',
4183 'title': compat_str,
4184 'uploader': 'Sky News',
4185 'uploader_id': 'skynews',
4186 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4187 'upload_date': r're:\d{8}',
4188 'description': compat_str,
4189 'categories': ['News & Politics'],
4190 'tags': list,
4191 'like_count': int,
4192 'dislike_count': int,
4193 },
4194 'params': {
4195 'skip_download': True,
4196 },
4197 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4198 }, {
4199 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4200 'info_dict': {
4201 'id': 'a48o2S1cPoo',
4202 'ext': 'mp4',
4203 'title': 'The Young Turks - Live Main Show',
4204 'uploader': 'The Young Turks',
4205 'uploader_id': 'TheYoungTurks',
4206 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4207 'upload_date': '20150715',
4208 'license': 'Standard YouTube License',
4209 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4210 'categories': ['News & Politics'],
4211 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4212 'like_count': int,
4213 'dislike_count': int,
4214 },
4215 'params': {
4216 'skip_download': True,
4217 },
4218 'only_matching': True,
4219 }, {
4220 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4221 'only_matching': True,
4222 }, {
4223 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4224 'only_matching': True,
4225 }, {
4226 'note': 'A channel that is not live. Should raise error',
4227 'url': 'https://www.youtube.com/user/numberphile/live',
4228 'only_matching': True,
4229 }, {
4230 'url': 'https://www.youtube.com/feed/trending',
4231 'only_matching': True,
4232 }, {
4233 'url': 'https://www.youtube.com/feed/library',
4234 'only_matching': True,
4235 }, {
4236 'url': 'https://www.youtube.com/feed/history',
4237 'only_matching': True,
4238 }, {
4239 'url': 'https://www.youtube.com/feed/subscriptions',
4240 'only_matching': True,
4241 }, {
4242 'url': 'https://www.youtube.com/feed/watch_later',
4243 'only_matching': True,
4244 }, {
4245 'note': 'Recommended - redirects to home page.',
4246 'url': 'https://www.youtube.com/feed/recommended',
4247 'only_matching': True,
4248 }, {
4249 'note': 'inline playlist with not always working continuations',
4250 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4251 'only_matching': True,
4252 }, {
4253 'url': 'https://www.youtube.com/course',
4254 'only_matching': True,
4255 }, {
4256 'url': 'https://www.youtube.com/zsecurity',
4257 'only_matching': True,
4258 }, {
4259 'url': 'http://www.youtube.com/NASAgovVideo/videos',
4260 'only_matching': True,
4261 }, {
4262 'url': 'https://www.youtube.com/TheYoungTurks/live',
4263 'only_matching': True,
4264 }, {
4265 'url': 'https://www.youtube.com/hashtag/cctv9',
4266 'info_dict': {
4267 'id': 'cctv9',
4268 'title': '#cctv9',
4269 },
4270 'playlist_mincount': 350,
4271 }, {
4272 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4273 'only_matching': True,
4274 }, {
4275 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4276 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4277 'only_matching': True
4278 }, {
4279 'note': '/browse/ should redirect to /channel/',
4280 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4281 'only_matching': True
4282 }, {
4283 'note': 'VLPL, should redirect to playlist?list=PL...',
4284 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4285 'info_dict': {
4286 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4287 'uploader': 'NoCopyrightSounds',
4288 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4289 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4290 'title': 'NCS Releases',
4291 },
4292 'playlist_mincount': 166,
4293 }, {
4294 'note': 'Topic, should redirect to playlist?list=UU...',
4295 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4296 'info_dict': {
4297 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4298 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4299 'title': 'Uploads from Royalty Free Music - Topic',
4300 'uploader': 'Royalty Free Music - Topic',
4301 },
4302 'expected_warnings': [
4303 'A channel/user page was given',
4304 'The URL does not have a videos tab',
4305 ],
4306 'playlist_mincount': 101,
4307 }, {
4308 'note': 'Topic without a UU playlist',
4309 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4310 'info_dict': {
4311 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4312 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4313 },
4314 'expected_warnings': [
4315 'A channel/user page was given',
4316 'The URL does not have a videos tab',
4317 'Falling back to channel URL',
4318 ],
4319 'playlist_mincount': 9,
4320 }, {
4321 'note': 'Youtube music Album',
4322 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4323 'info_dict': {
4324 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4325 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4326 },
4327 'playlist_count': 50,
4328 }, {
4329 'note': 'unlisted single video playlist',
4330 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4331 'info_dict': {
4332 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4333 'uploader': 'colethedj',
4334 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4335 'title': 'yt-dlp unlisted playlist test',
4336 'availability': 'unlisted'
4337 },
4338 'playlist_count': 1,
4339 }, {
4340 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4341 'url': 'https://www.youtube.com/feed/recommended',
4342 'info_dict': {
4343 'id': 'recommended',
4344 'title': 'recommended',
4345 },
4346 'playlist_mincount': 50,
4347 'params': {
4348 'skip_download': True,
4349 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4350 },
4351 }, {
4352 'note': 'API Fallback: /videos tab, sorted by oldest first',
4353 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4354 'info_dict': {
4355 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4356 'title': 'Cody\'sLab - Videos',
4357 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4358 'uploader': 'Cody\'sLab',
4359 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4360 },
4361 'playlist_mincount': 650,
4362 'params': {
4363 'skip_download': True,
4364 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4365 },
4366 }, {
4367 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4368 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4369 'info_dict': {
4370 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4371 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4372 'title': 'Uploads from Royalty Free Music - Topic',
4373 'uploader': 'Royalty Free Music - Topic',
4374 },
4375 'expected_warnings': [
4376 'A channel/user page was given',
4377 'The URL does not have a videos tab',
4378 ],
4379 'playlist_mincount': 101,
4380 'params': {
4381 'skip_download': True,
4382 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4383 },
4384 }]
4385
4386 @classmethod
4387 def suitable(cls, url):
4388 return False if YoutubeIE.suitable(url) else super(
4389 YoutubeTabIE, cls).suitable(url)
9297939e 4390
cd7c66cf 4391 def _real_extract(self, url):
9297939e 4392 url, smuggled_data = unsmuggle_url(url, {})
4393 if self.is_music_url(url):
4394 smuggled_data['is_music_url'] = True
fe03a6cd 4395 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4396 if info_dict.get('entries'):
4397 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4398 return info_dict
4399
37e57a9f 4400 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$')
fe03a6cd 4401
4402 def __real_extract(self, url, smuggled_data):
cd7c66cf 4403 item_id = self._match_id(url)
4404 url = compat_urlparse.urlunparse(
4405 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4406 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4407
fe03a6cd 4408 def get_mobj(url):
37e57a9f 4409 mobj = self._URL_RE.match(url).groupdict()
07cce701 4410 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4411 return mobj
4412
37e57a9f 4413 mobj, redirect_warning = get_mobj(url), None
fe03a6cd 4414 # Youtube returns incomplete data if tabname is not lower case
4415 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 4416 if is_channel:
4417 if smuggled_data.get('is_music_url'):
37e57a9f 4418 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
fe03a6cd 4419 item_id = item_id[2:]
37e57a9f 4420 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
4421 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
ac56cf38 4422 mdata = self._extract_tab_endpoint(
37e57a9f 4423 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
4424 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
4425 get_all=False, expected_type=compat_str)
ac56cf38 4426 if not murl:
37e57a9f 4427 raise ExtractorError('Failed to resolve album to playlist')
ac56cf38 4428 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
37e57a9f 4429 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
4430 pre = f'https://www.youtube.com/channel/{item_id}'
4431
fe03a6cd 4432 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4433 # Home URLs should redirect to /videos/
37e57a9f 4434 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
4435 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4436 tab = '/videos'
4437
4438 url = ''.join((pre, tab, post))
4439 mobj = get_mobj(url)
cd7c66cf 4440
4441 # Handle both video/playlist URLs
201c1459 4442 qs = parse_qs(url)
37e57a9f 4443 video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]
cd7c66cf 4444
fe03a6cd 4445 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4446 if not playlist_id:
fe03a6cd 4447 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4448 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4449 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 4450 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
4451 url = f'https://www.youtube.com/playlist?list={playlist_id}'
18db7548 4452 mobj = get_mobj(url)
cd7c66cf 4453
4454 if video_id and playlist_id:
a06916d9 4455 if self.get_param('noplaylist'):
37e57a9f 4456 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4457 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4458 ie=YoutubeIE.ie_key(), video_id=video_id)
4459 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
cd7c66cf 4460
ac56cf38 4461 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 4462
37e57a9f 4463 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
18db7548 4464 if tabs:
4465 selected_tab = self._extract_selected_tab(tabs)
4466 tab_name = selected_tab.get('title', '')
09f1580e 4467 if 'no-youtube-channel-redirect' not in compat_opts:
4468 if mobj['tab'] == '/live':
4469 # Live tab should have redirected to the video
4470 raise ExtractorError('The channel is not currently live', expected=True)
4471 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
37e57a9f 4472 redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab'
09f1580e 4473 if not mobj['not_channel'] and item_id[:2] == 'UC':
4474 # Topic channels don't have /videos. Use the equivalent playlist instead
37e57a9f 4475 pl_id = f'UU{item_id[2:]}'
4476 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
09f1580e 4477 try:
37e57a9f 4478 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True)
09f1580e 4479 except ExtractorError:
37e57a9f 4480 redirect_warning += ' and the playlist redirect gave error'
4481 else:
4482 item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:]
4483 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
4484 if tab_name.lower() != mobj['tab'][1:]:
4485 redirect_warning += f'. {tab_name} tab is being downloaded instead'
18db7548 4486
37e57a9f 4487 if redirect_warning:
4488 self.report_warning(redirect_warning)
4489 self.write_debug(f'Final URL: {url}')
18db7548 4490
358de58c 4491 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4492 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 4493 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 4494 self._extract_and_report_alerts(data, only_once=True)
37e57a9f 4495 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
8bdd16b4 4496 if tabs:
ac56cf38 4497 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 4498
37e57a9f 4499 playlist = traverse_obj(
4500 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 4501 if playlist:
ac56cf38 4502 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 4503
37e57a9f 4504 video_id = traverse_obj(
4505 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 4506 if video_id:
09f1580e 4507 if mobj['tab'] != '/live': # live tab is expected to redirect to video
37e57a9f 4508 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
4509 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4510 ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4511
8bdd16b4 4512 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4513
c5e8d7af 4514
8bdd16b4 4515class YoutubePlaylistIE(InfoExtractor):
96565c7e 4516 IE_DESC = 'YouTube playlists'
8bdd16b4 4517 _VALID_URL = r'''(?x)(?:
4518 (?:https?://)?
4519 (?:\w+\.)?
4520 (?:
4521 (?:
4522 youtube(?:kids)?\.com|
d9190e44 4523 %(invidious)s
8bdd16b4 4524 )
4525 /.*?\?.*?\blist=
4526 )?
4527 (?P<id>%(playlist_id)s)
d9190e44
RH
4528 )''' % {
4529 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4530 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4531 }
8bdd16b4 4532 IE_NAME = 'youtube:playlist'
cdc628a4 4533 _TESTS = [{
8bdd16b4 4534 'note': 'issue #673',
4535 'url': 'PLBB231211A4F62143',
cdc628a4 4536 'info_dict': {
8bdd16b4 4537 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4538 'id': 'PLBB231211A4F62143',
4539 'uploader': 'Wickydoo',
4540 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4541 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4542 },
4543 'playlist_mincount': 29,
4544 }, {
4545 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4546 'info_dict': {
4547 'title': 'YDL_safe_search',
4548 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4549 },
4550 'playlist_count': 2,
4551 'skip': 'This playlist is private',
9558dcec 4552 }, {
8bdd16b4 4553 'note': 'embedded',
4554 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4555 'playlist_count': 4,
9558dcec 4556 'info_dict': {
8bdd16b4 4557 'title': 'JODA15',
4558 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4559 'uploader': 'milan',
4560 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4561 }
cdc628a4 4562 }, {
8bdd16b4 4563 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4564 'playlist_mincount': 654,
8bdd16b4 4565 'info_dict': {
4566 'title': '2018 Chinese New Singles (11/6 updated)',
4567 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4568 'uploader': 'LBK',
4569 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4570 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4571 }
daa0df9e 4572 }, {
29f7c58a 4573 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4574 'only_matching': True,
4575 }, {
4576 # music album playlist
4577 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4578 'only_matching': True,
4579 }]
4580
4581 @classmethod
4582 def suitable(cls, url):
201c1459 4583 if YoutubeTabIE.suitable(url):
4584 return False
49a57e70 4585 from ..utils import parse_qs
201c1459 4586 qs = parse_qs(url)
4587 if qs.get('v', [None])[0]:
4588 return False
4589 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4590
4591 def _real_extract(self, url):
4592 playlist_id = self._match_id(url)
46953e7e 4593 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4594 url = update_url_query(
4595 'https://www.youtube.com/playlist',
4596 parse_qs(url) or {'list': playlist_id})
4597 if is_music_url:
4598 url = smuggle_url(url, {'is_music_url': True})
4599 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4600
4601
4602class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4603 IE_DESC = 'youtu.be'
29f7c58a 4604 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4605 _TESTS = [{
8bdd16b4 4606 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4607 'info_dict': {
4608 'id': 'yeWKywCrFtk',
4609 'ext': 'mp4',
4610 'title': 'Small Scale Baler and Braiding Rugs',
4611 'uploader': 'Backus-Page House Museum',
4612 'uploader_id': 'backuspagemuseum',
4613 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4614 'upload_date': '20161008',
4615 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4616 'categories': ['Nonprofits & Activism'],
4617 'tags': list,
4618 'like_count': int,
4619 'dislike_count': int,
4620 },
4621 'params': {
4622 'noplaylist': True,
4623 'skip_download': True,
4624 },
39e7107d 4625 }, {
8bdd16b4 4626 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4627 'only_matching': True,
cdc628a4
PH
4628 }]
4629
8bdd16b4 4630 def _real_extract(self, url):
5ad28e7f 4631 mobj = self._match_valid_url(url)
29f7c58a 4632 video_id = mobj.group('id')
4633 playlist_id = mobj.group('playlist_id')
8bdd16b4 4634 return self.url_result(
29f7c58a 4635 update_url_query('https://www.youtube.com/watch', {
4636 'v': video_id,
4637 'list': playlist_id,
4638 'feature': 'youtu.be',
4639 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4640
4641
4642class YoutubeYtUserIE(InfoExtractor):
96565c7e 4643 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
8bdd16b4 4644 _VALID_URL = r'ytuser:(?P<id>.+)'
4645 _TESTS = [{
4646 'url': 'ytuser:phihag',
4647 'only_matching': True,
4648 }]
4649
4650 def _real_extract(self, url):
4651 user_id = self._match_id(url)
4652 return self.url_result(
c586f9e8 4653 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 4654 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4655
b05654f0 4656
3d3dddc9 4657class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4658 IE_NAME = 'youtube:favorites'
96565c7e 4659 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 4660 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4661 _LOGIN_REQUIRED = True
4662 _TESTS = [{
4663 'url': ':ytfav',
4664 'only_matching': True,
4665 }, {
4666 'url': ':ytfavorites',
4667 'only_matching': True,
4668 }]
4669
4670 def _real_extract(self, url):
4671 return self.url_result(
4672 'https://www.youtube.com/playlist?list=LL',
4673 ie=YoutubeTabIE.ie_key())
4674
4675
a6213a49 4676class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4677 IE_DESC = 'YouTube search'
78caa52a 4678 IE_NAME = 'youtube:search'
b05654f0 4679 _SEARCH_KEY = 'ytsearch'
a61fd4cf 4680 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
9dd8e46a 4681 _TESTS = []
b05654f0 4682
a61fd4cf 4683
5f7cb91a 4684class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 4685 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4686 _SEARCH_KEY = 'ytsearchdate'
a6213a49 4687 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 4688 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
75dff0ee 4689
c9ae7b95 4690
a6213a49 4691class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 4692 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 4693 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4694 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
3462ffa8 4695 _TESTS = [{
4696 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4697 'playlist_mincount': 5,
4698 'info_dict': {
11f9be09 4699 'id': 'youtube-dl test video',
3462ffa8 4700 'title': 'youtube-dl test video',
4701 }
a61fd4cf 4702 }, {
4703 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4704 'playlist_mincount': 5,
4705 'info_dict': {
4706 'id': 'python',
4707 'title': 'python',
4708 }
4709
3462ffa8 4710 }, {
4711 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4712 'only_matching': True,
4713 }]
4714
4715 def _real_extract(self, url):
4dfbf869 4716 qs = parse_qs(url)
386e1dd9 4717 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 4718 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 4719
4720
4721class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4722 """
25f14e9f 4723 Base class for feed extractors
3d3dddc9 4724 Subclasses must define the _FEED_NAME property.
d7ae0639 4725 """
b2e8bc1b 4726 _LOGIN_REQUIRED = True
ef2f3c7f 4727 _TESTS = []
d7ae0639
JMF
4728
4729 @property
4730 def IE_NAME(self):
78caa52a 4731 return 'youtube:%s' % self._FEED_NAME
04cc9617 4732
3853309f 4733 def _real_extract(self, url):
3d3dddc9 4734 return self.url_result(
4735 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4736 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4737
4738
ef2f3c7f 4739class YoutubeWatchLaterIE(InfoExtractor):
4740 IE_NAME = 'youtube:watchlater'
96565c7e 4741 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 4742 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4743 _TESTS = [{
8bdd16b4 4744 'url': ':ytwatchlater',
bc7a9cd8
S
4745 'only_matching': True,
4746 }]
25f14e9f
S
4747
4748 def _real_extract(self, url):
ef2f3c7f 4749 return self.url_result(
4750 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4751
4752
25f14e9f 4753class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 4754 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 4755 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4756 _FEED_NAME = 'recommended'
45db527f 4757 _LOGIN_REQUIRED = False
3d3dddc9 4758 _TESTS = [{
4759 'url': ':ytrec',
4760 'only_matching': True,
4761 }, {
4762 'url': ':ytrecommended',
4763 'only_matching': True,
4764 }, {
4765 'url': 'https://youtube.com',
4766 'only_matching': True,
4767 }]
1ed5b5c9 4768
1ed5b5c9 4769
25f14e9f 4770class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 4771 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 4772 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4773 _FEED_NAME = 'subscriptions'
3d3dddc9 4774 _TESTS = [{
4775 'url': ':ytsubs',
4776 'only_matching': True,
4777 }, {
4778 'url': ':ytsubscriptions',
4779 'only_matching': True,
4780 }]
1ed5b5c9 4781
1ed5b5c9 4782
25f14e9f 4783class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 4784 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 4785 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4786 _FEED_NAME = 'history'
3d3dddc9 4787 _TESTS = [{
4788 'url': ':ythistory',
4789 'only_matching': True,
4790 }]
1ed5b5c9
JMF
4791
4792
15870e90
PH
4793class YoutubeTruncatedURLIE(InfoExtractor):
4794 IE_NAME = 'youtube:truncated_url'
4795 IE_DESC = False # Do not list
975d35db 4796 _VALID_URL = r'''(?x)
b95aab84
PH
4797 (?:https?://)?
4798 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4799 (?:watch\?(?:
c4808c60 4800 feature=[a-z_]+|
b95aab84
PH
4801 annotation_id=annotation_[^&]+|
4802 x-yt-cl=[0-9]+|
c1708b89 4803 hl=[^&]*|
287be8c6 4804 t=[0-9]+
b95aab84
PH
4805 )?
4806 |
4807 attribution_link\?a=[^&]+
4808 )
4809 $
975d35db 4810 '''
15870e90 4811
c4808c60 4812 _TESTS = [{
2d3d2997 4813 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4814 'only_matching': True,
dc2fc736 4815 }, {
2d3d2997 4816 'url': 'https://www.youtube.com/watch?',
dc2fc736 4817 'only_matching': True,
b95aab84
PH
4818 }, {
4819 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4820 'only_matching': True,
4821 }, {
4822 'url': 'https://www.youtube.com/watch?feature=foo',
4823 'only_matching': True,
c1708b89
PH
4824 }, {
4825 'url': 'https://www.youtube.com/watch?hl=en-GB',
4826 'only_matching': True,
287be8c6
PH
4827 }, {
4828 'url': 'https://www.youtube.com/watch?t=2372',
4829 'only_matching': True,
c4808c60
PH
4830 }]
4831
15870e90
PH
4832 def _real_extract(self, url):
4833 raise ExtractorError(
78caa52a
PH
4834 'Did you forget to quote the URL? Remember that & is a meta '
4835 'character in most shells, so you want to put the URL in quotes, '
3867038a 4836 'like youtube-dl '
2d3d2997 4837 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4838 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4839 expected=True)
772fd5cc
PH
4840
4841
3cd786db 4842class YoutubeClipIE(InfoExtractor):
4843 IE_NAME = 'youtube:clip'
4844 IE_DESC = False # Do not list
4845 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4846
4847 def _real_extract(self, url):
4848 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4849 return self.url_result(url, 'Generic')
4850
4851
772fd5cc
PH
4852class YoutubeTruncatedIDIE(InfoExtractor):
4853 IE_NAME = 'youtube:truncated_id'
4854 IE_DESC = False # Do not list
b95aab84 4855 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4856
4857 _TESTS = [{
4858 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4859 'only_matching': True,
4860 }]
4861
4862 def _real_extract(self, url):
4863 video_id = self._match_id(url)
4864 raise ExtractorError(
4865 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4866 expected=True)