]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[tiktok] Fix `extractor_key` used in archive
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
109dd3b2 6import copy
fe93e2c4 7import datetime
adbc4ec4 8import functools
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
720c3099 12import math
c4417ddb 13import os.path
d77ab8e2 14import random
c5e8d7af 15import re
46383212 16import sys
8a784c74 17import time
e0df6211 18import traceback
adbc4ec4 19import threading
c5e8d7af 20
b05654f0 21from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 22from ..compat import (
edf3e38e 23 compat_chr,
29f7c58a 24 compat_HTTPError,
c5e8d7af 25 compat_parse_qs,
545cc85d 26 compat_str,
7fd002c0 27 compat_urllib_parse_unquote_plus,
15707c7e 28 compat_urllib_parse_urlencode,
7c80519c 29 compat_urllib_parse_urlparse,
7c61bd36 30 compat_urlparse,
4bb4a188 31)
545cc85d 32from ..jsinterp import JSInterpreter
4bb4a188 33from ..utils import (
720c3099 34 bug_reports_message,
c5e8d7af 35 clean_html,
d92f5d5a 36 datetime_from_str,
11f9be09 37 dict_get,
358de58c 38 error_to_compat_str,
c5e8d7af 39 ExtractorError,
2d30521a 40 float_or_none,
11f9be09 41 format_field,
dd27fd17 42 int_or_none,
641ad5d8 43 is_html,
34921b43 44 join_nonempty,
94278f72 45 mimetype2ext,
9c0d7f49 46 network_exceptions,
a6213a49 47 NO_DEFAULT,
11f9be09 48 orderedSet,
6310acf5 49 parse_codecs,
49bd8c66 50 parse_count,
7c80519c 51 parse_duration,
7ea65411 52 parse_iso8601,
4dfbf869 53 parse_qs,
dca3ff4a 54 qualities,
c0ac49bc 55 remove_end,
3995d37d 56 remove_start,
cf7e015f 57 smuggle_url,
dbdaaa23 58 str_or_none,
c93d53f5 59 str_to_int,
f3aa3c3f 60 strftime_or_none,
7c365c21 61 traverse_obj,
556dbe7f 62 try_get,
c5e8d7af
PH
63 unescapeHTML,
64 unified_strdate,
cf7e015f 65 unsmuggle_url,
8bdd16b4 66 update_url_query,
21c340b8 67 url_or_none,
fe93e2c4 68 urljoin,
7c365c21 69 variadic,
c5e8d7af
PH
70)
71
5f6a1245 72
720c3099 73def get_first(obj, keys, **kwargs):
74 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
75
76
000c15a4 77# any clients starting with _ cannot be explicity requested by the user
78INNERTUBE_CLIENTS = {
79 'web': {
80 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
81 'INNERTUBE_CONTEXT': {
82 'client': {
83 'clientName': 'WEB',
84 'clientVersion': '2.20210622.10.00',
85 }
86 },
87 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
88 },
89 'web_embedded': {
90 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
91 'INNERTUBE_CONTEXT': {
92 'client': {
93 'clientName': 'WEB_EMBEDDED_PLAYER',
94 'clientVersion': '1.20210620.0.1',
95 },
96 },
97 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
98 },
99 'web_music': {
100 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
101 'INNERTUBE_HOST': 'music.youtube.com',
102 'INNERTUBE_CONTEXT': {
103 'client': {
104 'clientName': 'WEB_REMIX',
105 'clientVersion': '1.20210621.00.00',
106 }
107 },
108 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
109 },
e7e94f2a
D
110 'web_creator': {
111 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
112 'INNERTUBE_CONTEXT': {
113 'client': {
114 'clientName': 'WEB_CREATOR',
115 'clientVersion': '1.20210621.00.00',
116 }
117 },
118 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
119 },
000c15a4 120 'android': {
121 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
122 'INNERTUBE_CONTEXT': {
123 'client': {
124 'clientName': 'ANDROID',
125 'clientVersion': '16.20',
126 }
127 },
128 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 129 'REQUIRE_JS_PLAYER': False
000c15a4 130 },
131 'android_embedded': {
132 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
133 'INNERTUBE_CONTEXT': {
134 'client': {
135 'clientName': 'ANDROID_EMBEDDED_PLAYER',
136 'clientVersion': '16.20',
137 },
138 },
b6de707d 139 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
140 'REQUIRE_JS_PLAYER': False
000c15a4 141 },
142 'android_music': {
143 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
144 'INNERTUBE_HOST': 'music.youtube.com',
145 'INNERTUBE_CONTEXT': {
146 'client': {
147 'clientName': 'ANDROID_MUSIC',
148 'clientVersion': '4.32',
149 }
150 },
151 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 152 'REQUIRE_JS_PLAYER': False
000c15a4 153 },
e7e94f2a
D
154 'android_creator': {
155 'INNERTUBE_CONTEXT': {
156 'client': {
157 'clientName': 'ANDROID_CREATOR',
158 'clientVersion': '21.24.100',
159 },
160 },
b6de707d 161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
162 'REQUIRE_JS_PLAYER': False
e7e94f2a 163 },
3619f78d 164 # ios has HLS live streams
165 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
000c15a4 166 'ios': {
167 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
168 'INNERTUBE_CONTEXT': {
169 'client': {
170 'clientName': 'IOS',
171 'clientVersion': '16.20',
172 }
173 },
b6de707d 174 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
175 'REQUIRE_JS_PLAYER': False
000c15a4 176 },
177 'ios_embedded': {
178 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
179 'INNERTUBE_CONTEXT': {
180 'client': {
181 'clientName': 'IOS_MESSAGES_EXTENSION',
182 'clientVersion': '16.20',
183 },
184 },
b6de707d 185 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
186 'REQUIRE_JS_PLAYER': False
000c15a4 187 },
188 'ios_music': {
189 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
190 'INNERTUBE_HOST': 'music.youtube.com',
191 'INNERTUBE_CONTEXT': {
192 'client': {
193 'clientName': 'IOS_MUSIC',
194 'clientVersion': '4.32',
195 },
196 },
b6de707d 197 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
198 'REQUIRE_JS_PLAYER': False
000c15a4 199 },
e7e94f2a
D
200 'ios_creator': {
201 'INNERTUBE_CONTEXT': {
202 'client': {
203 'clientName': 'IOS_CREATOR',
204 'clientVersion': '21.24.100',
205 },
206 },
b6de707d 207 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
208 'REQUIRE_JS_PLAYER': False
e7e94f2a 209 },
3619f78d 210 # mweb has 'ultralow' formats
211 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 212 'mweb': {
213 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
214 'INNERTUBE_CONTEXT': {
215 'client': {
216 'clientName': 'MWEB',
217 'clientVersion': '2.20210721.07.00',
218 }
219 },
220 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
221 },
222}
223
224
225def build_innertube_clients():
65c2fde2 226 third_party = {
227 'embedUrl': 'https://google.com', # Can be any valid URL
228 }
000c15a4 229 base_clients = ('android', 'web', 'ios', 'mweb')
230 priority = qualities(base_clients[::-1])
231
232 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 233 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 234 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 235 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 236 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
237 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
238
239 if client in base_clients:
240 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
241 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
65c2fde2 242 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 243 agegate_ytcfg['priority'] -= 1
244 elif client.endswith('_embedded'):
65c2fde2 245 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 246 ytcfg['priority'] -= 2
247 else:
248 ytcfg['priority'] -= 3
249
250
251build_innertube_clients()
252
253
de7f3446 254class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 255 """Provide base functions for Youtube extractors"""
e00eb564 256
3462ffa8 257 _RESERVED_NAMES = (
3cd786db 258 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
3619f78d 259 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
260 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 261 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 262
3619f78d 263 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
264
b2e8bc1b 265 _NETRC_MACHINE = 'youtube'
3619f78d 266
b2e8bc1b
JMF
267 # If True it will raise an error if no login info is provided
268 _LOGIN_REQUIRED = False
269
d9190e44
RH
270 _INVIDIOUS_SITES = (
271 # invidious-redirect websites
272 r'(?:www\.)?redirect\.invidious\.io',
273 r'(?:(?:www|dev)\.)?invidio\.us',
274 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
275 r'(?:www\.)?invidious\.pussthecat\.org',
276 r'(?:www\.)?invidious\.zee\.li',
277 r'(?:www\.)?invidious\.ethibox\.fr',
278 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
279 # youtube-dl invidious instances list
280 r'(?:(?:www|no)\.)?invidiou\.sh',
281 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
282 r'(?:www\.)?invidious\.kabi\.tk',
283 r'(?:www\.)?invidious\.mastodon\.host',
284 r'(?:www\.)?invidious\.zapashcanon\.fr',
285 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
286 r'(?:www\.)?invidious\.tinfoil-hat\.net',
287 r'(?:www\.)?invidious\.himiko\.cloud',
288 r'(?:www\.)?invidious\.reallyancient\.tech',
289 r'(?:www\.)?invidious\.tube',
290 r'(?:www\.)?invidiou\.site',
291 r'(?:www\.)?invidious\.site',
292 r'(?:www\.)?invidious\.xyz',
293 r'(?:www\.)?invidious\.nixnet\.xyz',
294 r'(?:www\.)?invidious\.048596\.xyz',
295 r'(?:www\.)?invidious\.drycat\.fr',
296 r'(?:www\.)?inv\.skyn3t\.in',
297 r'(?:www\.)?tube\.poal\.co',
298 r'(?:www\.)?tube\.connect\.cafe',
299 r'(?:www\.)?vid\.wxzm\.sx',
300 r'(?:www\.)?vid\.mint\.lgbt',
301 r'(?:www\.)?vid\.puffyan\.us',
302 r'(?:www\.)?yewtu\.be',
303 r'(?:www\.)?yt\.elukerio\.org',
304 r'(?:www\.)?yt\.lelux\.fi',
305 r'(?:www\.)?invidious\.ggc-project\.de',
306 r'(?:www\.)?yt\.maisputain\.ovh',
307 r'(?:www\.)?ytprivate\.com',
308 r'(?:www\.)?invidious\.13ad\.de',
309 r'(?:www\.)?invidious\.toot\.koeln',
310 r'(?:www\.)?invidious\.fdn\.fr',
311 r'(?:www\.)?watch\.nettohikari\.com',
312 r'(?:www\.)?invidious\.namazso\.eu',
313 r'(?:www\.)?invidious\.silkky\.cloud',
314 r'(?:www\.)?invidious\.exonip\.de',
315 r'(?:www\.)?invidious\.riverside\.rocks',
316 r'(?:www\.)?invidious\.blamefran\.net',
317 r'(?:www\.)?invidious\.moomoo\.de',
318 r'(?:www\.)?ytb\.trom\.tf',
319 r'(?:www\.)?yt\.cyberhost\.uk',
320 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
321 r'(?:www\.)?qklhadlycap4cnod\.onion',
322 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
323 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
324 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
325 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
326 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
327 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
328 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
329 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
330 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
331 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
332 )
333
b2e8bc1b 334 def _login(self):
83317f69 335 """
336 Attempt to log in to YouTube.
83317f69 337 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
338 """
9d5d4d64 339
982ee69a
MB
340 if (self._LOGIN_REQUIRED
341 and self.get_param('cookiefile') is None
342 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 343 self.raise_login_required(
344 'Login details are needed to download this content', method='cookies')
68217024 345 username, password = self._get_login_info()
9d5d4d64 346 if username:
24b0a72b 347 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
b2e8bc1b 348
cce889b9 349 def _initialize_consent(self):
350 cookies = self._get_cookies('https://www.youtube.com/')
351 if cookies.get('__Secure-3PSID'):
352 return
353 consent_id = None
354 consent = cookies.get('CONSENT')
355 if consent:
356 if 'YES' in consent.value:
357 return
358 consent_id = self._search_regex(
359 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
360 if not consent_id:
361 consent_id = random.randint(100, 999)
362 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 363
f3aa3c3f 364 def _initialize_pref(self):
365 cookies = self._get_cookies('https://www.youtube.com/')
366 pref_cookie = cookies.get('PREF')
367 pref = {}
368 if pref_cookie:
369 try:
370 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
371 except ValueError:
372 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
373 pref.update({'hl': 'en'})
374 self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
375
b2e8bc1b 376 def _real_initialize(self):
f3aa3c3f 377 self._initialize_pref()
cce889b9 378 self._initialize_consent()
24b0a72b 379 self._login()
c5e8d7af 380
a0566bbf 381 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 382 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
383 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 384
000c15a4 385 def _get_default_ytcfg(self, client='web'):
386 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 387
000c15a4 388 def _get_innertube_host(self, client='web'):
389 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 390
000c15a4 391 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 392 # try_get but with fallback to default ytcfg client values when present
393 _func = lambda y: try_get(y, getter, expected_type)
394 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
395
000c15a4 396 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 397 return self._ytcfg_get_safe(
398 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
399 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 400
000c15a4 401 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 402 return self._ytcfg_get_safe(
403 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
404 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 405
000c15a4 406 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 407 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
408
000c15a4 409 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 410 context = get_first(
411 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
412 # Enforce language for extraction
413 traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'
109dd3b2 414 return context
415
cf87314d 416 _SAPISID = None
417
109dd3b2 418 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 419 time_now = round(time.time())
cf87314d 420 if self._SAPISID is None:
421 yt_cookies = self._get_cookies('https://www.youtube.com')
422 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
423 # See: https://github.com/yt-dlp/yt-dlp/issues/393
424 sapisid_cookie = dict_get(
425 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
426 if sapisid_cookie and sapisid_cookie.value:
427 self._SAPISID = sapisid_cookie.value
428 self.write_debug('Extracted SAPISID cookie')
429 # SAPISID cookie is required if not already present
430 if not yt_cookies.get('SAPISID'):
431 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
432 self._set_cookie(
433 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
434 else:
435 self._SAPISID = False
436 if not self._SAPISID:
437 return None
1974e99f 438 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
439 sapisidhash = hashlib.sha1(
cf87314d 440 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
1974e99f 441 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
442
443 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 444 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 445 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 446
109dd3b2 447 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 448 data.update(query)
11f9be09 449 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 450 real_headers.update({'content-type': 'application/json'})
451 if headers:
452 real_headers.update(headers)
545cc85d 453 return self._download_json(
109dd3b2 454 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 455 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 456 data=json.dumps(data).encode('utf8'), headers=real_headers,
457 query={'key': api_key or self._extract_api_key()})
458
ac56cf38 459 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
460 data = self._search_regex(
461 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
462 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
463 if data:
464 return self._parse_json(data, item_id, fatal=fatal)
0c148415 465
99e9e001 466 @staticmethod
467 def _extract_session_index(*data):
468 """
469 Index of current account in account list.
470 See: https://github.com/yt-dlp/yt-dlp/pull/519
471 """
472 for ytcfg in data:
473 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
474 if session_index is not None:
475 return session_index
476
477 # Deprecated?
478 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca
M
479 if ytcfg:
480 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
481 if token:
482 return token
99e9e001 483 if webpage:
484 return self._search_regex(
485 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
486 'identity token', default=None, fatal=False)
a1c5d2ca
M
487
488 @staticmethod
fe93e2c4 489 def _extract_account_syncid(*args):
8ea3f7b9 490 """
491 Extract syncId required to download private playlists of secondary channels
fe93e2c4 492 @params response and/or ytcfg
8ea3f7b9 493 """
fe93e2c4 494 for data in args:
495 # ytcfg includes channel_syncid if on secondary channel
496 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
497 if delegated_sid:
498 return delegated_sid
499 sync_ids = (try_get(
500 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
e6f21b3d 501 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
fe93e2c4 502 if len(sync_ids) >= 2 and sync_ids[1]:
503 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
504 # and just "user_syncid||" for primary channel. We only want the channel_syncid
505 return sync_ids[0]
a1c5d2ca 506
ac56cf38 507 @staticmethod
508 def _extract_visitor_data(*args):
509 """
510 Extracts visitorData from an API response or ytcfg
511 Appears to be used to track session state
512 """
9222c381 513 return get_first(
514 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
515 expected_type=str)
ac56cf38 516
99e9e001 517 @property
518 def is_authenticated(self):
519 return bool(self._generate_sapisidhash_header())
520
11f9be09 521 def extract_ytcfg(self, video_id, webpage):
8c54a305 522 if not webpage:
523 return {}
29f7c58a 524 return self._parse_json(
525 self._search_regex(
526 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 527 default='{}'), video_id, fatal=False) or {}
528
11f9be09 529 def generate_api_headers(
99e9e001 530 self, *, ytcfg=None, account_syncid=None, session_index=None,
531 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
532
11f9be09 533 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 534 headers = {
109dd3b2 535 'X-YouTube-Client-Name': compat_str(
11f9be09 536 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
537 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 538 'Origin': origin,
539 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
540 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
ac56cf38 541 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
99e9e001 542 }
543 if session_index is None:
314ee305 544 session_index = self._extract_session_index(ytcfg)
545 if account_syncid or session_index is not None:
546 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 547
109dd3b2 548 auth = self._generate_sapisidhash_header(origin)
f4f751af 549 if auth is not None:
550 headers['Authorization'] = auth
109dd3b2 551 headers['X-Origin'] = origin
99e9e001 552 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 553
2d6659b9 554 @staticmethod
555 def _build_api_continuation_query(continuation, ctp=None):
556 query = {
557 'continuation': continuation
558 }
559 # TODO: Inconsistency with clickTrackingParams.
560 # Currently we have a fixed ctp contained within context (from ytcfg)
561 # and a ctp in root query for continuation.
562 if ctp:
563 query['clickTracking'] = {'clickTrackingParams': ctp}
564 return query
565
2d6659b9 566 @classmethod
567 def _extract_next_continuation_data(cls, renderer):
568 next_continuation = try_get(
569 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
570 lambda x: x['continuation']['reloadContinuationData']), dict)
571 if not next_continuation:
572 return
573 continuation = next_continuation.get('continuation')
574 if not continuation:
575 return
576 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 577 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 578
579 @classmethod
580 def _extract_continuation_ep_data(cls, continuation_ep: dict):
581 if isinstance(continuation_ep, dict):
582 continuation = try_get(
583 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
584 if not continuation:
585 return
586 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 587 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 588
589 @classmethod
590 def _extract_continuation(cls, renderer):
591 next_continuation = cls._extract_next_continuation_data(renderer)
592 if next_continuation:
593 return next_continuation
fe93e2c4 594
2d6659b9 595 contents = []
596 for key in ('contents', 'items'):
597 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 598
2d6659b9 599 for content in contents:
600 if not isinstance(content, dict):
601 continue
602 continuation_ep = try_get(
603 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
604 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
605 dict)
606 continuation = cls._extract_continuation_ep_data(continuation_ep)
607 if continuation:
608 return continuation
609
fe93e2c4 610 @classmethod
611 def _extract_alerts(cls, data):
109dd3b2 612 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
613 if not isinstance(alert_dict, dict):
614 continue
615 for alert in alert_dict.values():
616 alert_type = alert.get('type')
617 if not alert_type:
618 continue
052e1350 619 message = cls._get_text(alert, 'text')
109dd3b2 620 if message:
621 yield alert_type, message
622
c0ac49bc 623 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 624 errors = []
625 warnings = []
626 for alert_type, alert_message in alerts:
641ad5d8 627 if alert_type.lower() == 'error' and fatal:
109dd3b2 628 errors.append([alert_type, alert_message])
629 else:
630 warnings.append([alert_type, alert_message])
631
632 for alert_type, alert_message in (warnings + errors[:-1]):
c0ac49bc 633 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
109dd3b2 634 if errors:
635 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
636
637 def _extract_and_report_alerts(self, data, *args, **kwargs):
638 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
639
47193e02 640 def _extract_badges(self, renderer: dict):
641 badges = set()
642 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
643 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
644 if label:
645 badges.add(label.lower())
646 return badges
647
648 @staticmethod
052e1350 649 def _get_text(data, *path_list, max_runs=None):
650 for path in path_list or [None]:
651 if path is None:
652 obj = [data]
653 else:
654 obj = traverse_obj(data, path, default=[])
655 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
656 obj = [obj]
657 for item in obj:
658 text = try_get(item, lambda x: x['simpleText'], compat_str)
659 if text:
660 return text
661 runs = try_get(item, lambda x: x['runs'], list) or []
662 if not runs and isinstance(item, list):
663 runs = item
664
665 runs = runs[:min(len(runs), max_runs or len(runs))]
666 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
667 if text:
668 return text
47193e02 669
a709d873 670 @staticmethod
671 def _extract_thumbnails(data, *path_list):
672 """
673 Extract thumbnails from thumbnails dict
674 @param path_list: path list to level that contains 'thumbnails' key
675 """
676 thumbnails = []
677 for path in path_list or [()]:
678 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
679 thumbnail_url = url_or_none(thumbnail.get('url'))
680 if not thumbnail_url:
681 continue
682 # Sometimes youtube gives a wrong thumbnail URL. See:
683 # https://github.com/yt-dlp/yt-dlp/issues/233
684 # https://github.com/ytdl-org/youtube-dl/issues/28023
685 if 'maxresdefault' in thumbnail_url:
686 thumbnail_url = thumbnail_url.split('?')[0]
687 thumbnails.append({
688 'url': thumbnail_url,
689 'height': int_or_none(thumbnail.get('height')),
690 'width': int_or_none(thumbnail.get('width')),
691 })
692 return thumbnails
693
f3aa3c3f 694 @staticmethod
695 def extract_relative_time(relative_time_text):
696 """
697 Extracts a relative time from string and converts to dt object
698 e.g. 'streamed 6 days ago', '5 seconds ago (edited)'
699 """
700 mobj = re.search(r'(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
701 if mobj:
702 try:
703 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')), precision='auto')
704 except ValueError:
705 return None
706
707 def _extract_time_text(self, renderer, *path_list):
708 text = self._get_text(renderer, *path_list) or ''
709 dt = self.extract_relative_time(text)
710 timestamp = None
711 if isinstance(dt, datetime.datetime):
712 timestamp = calendar.timegm(dt.timetuple())
713 if text and timestamp is None:
714 self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)
715 return timestamp, text
716
109dd3b2 717 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
718 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 719 default_client='web'):
109dd3b2 720 response = None
721 last_error = None
722 count = -1
723 retries = self.get_param('extractor_retries', 3)
724 if check_get_keys is None:
725 check_get_keys = []
726 while count < retries:
727 count += 1
728 if last_error:
c0ac49bc 729 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
109dd3b2 730 try:
731 response = self._call_api(
732 ep=ep, fatal=True, headers=headers,
733 video_id=item_id, query=query,
734 context=self._extract_context(ytcfg, default_client),
735 api_key=self._extract_api_key(ytcfg, default_client),
736 api_hostname=api_hostname, default_client=default_client,
737 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
738 except ExtractorError as e:
9c0d7f49 739 if isinstance(e.cause, network_exceptions):
641ad5d8 740 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
741 e.cause.seek(0)
742 yt_error = try_get(
743 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
744 lambda x: x['error']['message'], compat_str)
745 if yt_error:
746 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 747 # Downloading page may result in intermittent 5xx HTTP error
748 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 749 # We also want to catch all other network exceptions since errors in later pages can be troublesome
750 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
751 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
526d74ec 752 last_error = error_to_compat_str(e.cause or e.msg)
9c0d7f49 753 if count < retries:
754 continue
109dd3b2 755 if fatal:
756 raise
757 else:
758 self.report_warning(error_to_compat_str(e))
759 return
760
761 else:
109dd3b2 762 try:
ac56cf38 763 self._extract_and_report_alerts(response, only_once=True)
109dd3b2 764 except ExtractorError as e:
c0ac49bc 765 # YouTube servers may return errors we want to retry on in a 200 OK response
766 # See: https://github.com/yt-dlp/yt-dlp/issues/839
767 if 'unknown error' in e.msg.lower():
768 last_error = e.msg
769 continue
109dd3b2 770 if fatal:
771 raise
772 self.report_warning(error_to_compat_str(e))
773 return
774 if not check_get_keys or dict_get(response, check_get_keys):
775 break
776 # Youtube sometimes sends incomplete data
777 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
778 last_error = 'Incomplete data received'
779 if count >= retries:
780 if fatal:
781 raise ExtractorError(last_error)
782 else:
783 self.report_warning(last_error)
784 return
785 return response
786
9297939e 787 @staticmethod
788 def is_music_url(url):
789 return re.match(r'https?://music\.youtube\.com/', url) is not None
790
30a074c2 791 def _extract_video(self, renderer):
792 video_id = renderer.get('videoId')
052e1350 793 title = self._get_text(renderer, 'title')
794 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 795 duration = parse_duration(self._get_text(
796 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 797 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 798 view_count = str_to_int(self._search_regex(
799 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
800 'view count', default=None))
fe93e2c4 801
052e1350 802 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
f3aa3c3f 803 channel_id = traverse_obj(
804 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)
805 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
806 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
807 overlay_style = traverse_obj(
808 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
809 badges = self._extract_badges(renderer)
a709d873 810 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
811
30a074c2 812 return {
39ed931e 813 '_type': 'url',
30a074c2 814 'ie_key': YoutubeIE.ie_key(),
815 'id': video_id,
5e3f2f8f 816 'url': f'https://www.youtube.com/watch?v={video_id}',
30a074c2 817 'title': title,
818 'description': description,
819 'duration': duration,
820 'view_count': view_count,
821 'uploader': uploader,
f3aa3c3f 822 'channel_id': channel_id,
a709d873 823 'thumbnails': thumbnails,
f3aa3c3f 824 'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
825 'live_status': ('is_upcoming' if scheduled_timestamp is not None
826 else 'was_live' if 'streamed' in time_text.lower()
827 else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
828 else None),
829 'release_timestamp': scheduled_timestamp,
830 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
30a074c2 831 }
832
0c148415 833
360e1ca5 834class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 835 IE_DESC = 'YouTube'
cb7dfeea 836 _VALID_URL = r"""(?x)^
c5e8d7af 837 (
edb53e2d 838 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 839 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
840 (?:www\.)?deturl\.com/www\.youtube\.com|
841 (?:www\.)?pwnyoutube\.com|
842 (?:www\.)?hooktube\.com|
843 (?:www\.)?yourepeat\.com|
844 tube\.majestyc\.net|
845 %(invidious)s|
846 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
847 (?:.*?\#/)? # handle anchor (#/) redirect urls
848 (?: # the various things that can precede the ID:
8fc54b12 849 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
c5e8d7af 850 |(?: # or the v= param in all its forms
f7000f3a 851 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 852 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 853 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
854 v=
855 )
f4b05232 856 ))
cbaed4bb
S
857 |(?:
858 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
859 vid\.plus| # or vid.plus/xxxx
860 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 861 %(invidious)s
cbaed4bb 862 )/
edb53e2d 863 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 864 )
c5e8d7af 865 )? # all until now is optional -> you can pass the naked ID
201c1459 866 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 867 (?(1).+)? # if we found the ID, everything can follow
9297939e 868 (?:\#|$)""" % {
d9190e44 869 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 870 }
e40c758c 871 _PLAYER_INFO_RE = (
cc2db878 872 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
873 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 874 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 875 )
2c62dc26 876 _formats = {
c2d3cb4c 877 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
878 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
879 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
880 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
881 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
882 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
883 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
884 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 885 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 886 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
887 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
888 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
889 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
890 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
891 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 892 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 893 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
894 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 895
896
897 # 3D videos
c2d3cb4c 898 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
899 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
900 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
901 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 902 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
903 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
904 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 905
96fb5605 906 # Apple HTTP Live Streaming
11f12195 907 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 908 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
909 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
910 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
911 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
912 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 913 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
914 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
915
916 # DASH mp4 video
d23028a8
S
917 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
918 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
919 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
920 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
921 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 922 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
923 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
924 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
925 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
926 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
927 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
928 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 929
f6f1fc92 930 # Dash mp4 audio
d23028a8
S
931 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
932 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
933 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
934 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
935 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
936 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
937 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
938
939 # Dash webm
d23028a8
S
940 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
941 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
942 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
943 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
944 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
945 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
946 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
947 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
948 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
949 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
950 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
951 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
952 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
953 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
954 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 955 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
956 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
957 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
958 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
959 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
960 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
961 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
962
963 # Dash webm audio
d23028a8
S
964 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
965 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 966
0857baad 967 # Dash webm audio with opus inside
d23028a8
S
968 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
969 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
970 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 971
ce6b9a2d
PH
972 # RTMP (unnamed)
973 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
974
975 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
976 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
977 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
978 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
979 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
980 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
981 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
982 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
983 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 984 }
29f7c58a 985 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 986
fd5c4aab
S
987 _GEO_BYPASS = False
988
78caa52a 989 IE_NAME = 'youtube'
2eb88d95
PH
990 _TESTS = [
991 {
2d3d2997 992 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
993 'info_dict': {
994 'id': 'BaW_jenozKc',
995 'ext': 'mp4',
3867038a 996 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
997 'uploader': 'Philipp Hagemeister',
998 'uploader_id': 'phihag',
ec85ded8 999 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1000 'channel': 'Philipp Hagemeister',
dd4c4492
S
1001 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1002 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1003 'upload_date': '20121002',
ff9f925b 1004 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1005 'categories': ['Science & Technology'],
3867038a 1006 'tags': ['youtube-dl'],
556dbe7f 1007 'duration': 10,
dbdaaa23 1008 'view_count': int,
3e7c1224 1009 'like_count': int,
ff9f925b 1010 # 'dislike_count': int,
1011 'availability': 'public',
1012 'playable_in_embed': True,
1013 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1014 'live_status': 'not_live',
1015 'age_limit': 0,
7c80519c 1016 'start_time': 1,
297a564b 1017 'end_time': 9,
2eb88d95 1018 }
0e853ca4 1019 },
fccd3771 1020 {
4bc3a23e
PH
1021 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1022 'note': 'Embed-only video (#1746)',
1023 'info_dict': {
1024 'id': 'yZIXLfi8CZQ',
1025 'ext': 'mp4',
1026 'upload_date': '20120608',
1027 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1028 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1029 'uploader': 'SET India',
94bfcd23 1030 'uploader_id': 'setindia',
ec85ded8 1031 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1032 'age_limit': 18,
545cc85d 1033 },
1034 'skip': 'Private video',
fccd3771 1035 },
11b56058 1036 {
8bdd16b4 1037 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1038 'note': 'Use the first video ID in the URL',
1039 'info_dict': {
1040 'id': 'BaW_jenozKc',
1041 'ext': 'mp4',
3867038a 1042 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1043 'uploader': 'Philipp Hagemeister',
1044 'uploader_id': 'phihag',
ec85ded8 1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1046 'upload_date': '20121002',
3867038a 1047 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1048 'categories': ['Science & Technology'],
3867038a 1049 'tags': ['youtube-dl'],
556dbe7f 1050 'duration': 10,
dbdaaa23 1051 'view_count': int,
11b56058
PM
1052 'like_count': int,
1053 'dislike_count': int,
34a7de29
S
1054 },
1055 'params': {
1056 'skip_download': True,
1057 },
11b56058 1058 },
dd27fd17 1059 {
2d3d2997 1060 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1061 'note': '256k DASH audio (format 141) via DASH manifest',
1062 'info_dict': {
1063 'id': 'a9LDPn-MO4I',
1064 'ext': 'm4a',
1065 'upload_date': '20121002',
1066 'uploader_id': '8KVIDEO',
ec85ded8 1067 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1068 'description': '',
1069 'uploader': '8KVIDEO',
1070 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1071 },
4bc3a23e
PH
1072 'params': {
1073 'youtube_include_dash_manifest': True,
1074 'format': '141',
4919603f 1075 },
de3c7fe0 1076 'skip': 'format 141 not served anymore',
dd27fd17 1077 },
8bdd16b4 1078 # DASH manifest with encrypted signature
1079 {
1080 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1081 'info_dict': {
1082 'id': 'IB3lcPjvWLA',
1083 'ext': 'm4a',
1084 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1085 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1086 'duration': 244,
1087 'uploader': 'AfrojackVEVO',
1088 'uploader_id': 'AfrojackVEVO',
1089 'upload_date': '20131011',
cc2db878 1090 'abr': 129.495,
8bdd16b4 1091 },
1092 'params': {
1093 'youtube_include_dash_manifest': True,
1094 'format': '141/bestaudio[ext=m4a]',
1095 },
1096 },
65c2fde2 1097 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1098 {
65c2fde2 1099 'note': 'Embed allowed age-gate video',
2d3d2997 1100 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1101 'info_dict': {
1102 'id': 'HtVdAasjOgU',
1103 'ext': 'mp4',
1104 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1105 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1106 'duration': 142,
c522adb1
JMF
1107 'uploader': 'The Witcher',
1108 'uploader_id': 'WitcherGame',
ec85ded8 1109 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1110 'upload_date': '20140605',
34952f09 1111 'age_limit': 18,
c522adb1
JMF
1112 },
1113 },
65c2fde2 1114 {
1115 'note': 'Age-gate video with embed allowed in public site',
1116 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1117 'info_dict': {
1118 'id': 'HsUATh_Nc2U',
1119 'ext': 'mp4',
1120 'title': 'Godzilla 2 (Official Video)',
1121 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1122 'upload_date': '20200408',
1123 'uploader_id': 'FlyingKitty900',
1124 'uploader': 'FlyingKitty',
1125 'age_limit': 18,
1126 },
1127 },
1128 {
1129 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1130 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1131 'info_dict': {
1132 'id': 'Tq92D6wQ1mg',
1133 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1134 'ext': 'mp4',
1135 'upload_date': '20191227',
65c2fde2 1136 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1137 'uploader': 'Projekt Melody',
1138 'description': 'md5:17eccca93a786d51bc67646756894066',
1139 'age_limit': 18,
1140 },
1141 },
1142 {
1143 'note': 'Non-Agegated non-embeddable video',
1144 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1145 'info_dict': {
1146 'id': 'MeJVWBSsPAY',
1147 'ext': 'mp4',
1148 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1149 'uploader': 'Herr Lurik',
1150 'uploader_id': 'st3in234',
1151 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1152 'upload_date': '20130730',
1153 },
1154 },
1155 {
1156 'note': 'Non-bypassable age-gated video',
1157 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1158 'only_matching': True,
1159 },
8bdd16b4 1160 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1161 # YouTube Red ad is not captured for creator
1162 {
1163 'url': '__2ABJjxzNo',
1164 'info_dict': {
1165 'id': '__2ABJjxzNo',
1166 'ext': 'mp4',
1167 'duration': 266,
1168 'upload_date': '20100430',
1169 'uploader_id': 'deadmau5',
1170 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1171 'creator': 'deadmau5',
1172 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1173 'uploader': 'deadmau5',
1174 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1175 'alt_title': 'Some Chords',
8bdd16b4 1176 },
1177 'expected_warnings': [
1178 'DASH manifest missing',
1179 ]
1180 },
067aa17e 1181 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1182 {
1183 'url': 'lqQg6PlCWgI',
1184 'info_dict': {
1185 'id': 'lqQg6PlCWgI',
1186 'ext': 'mp4',
556dbe7f 1187 'duration': 6085,
90227264 1188 'upload_date': '20150827',
cbe2bd91 1189 'uploader_id': 'olympic',
ec85ded8 1190 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1191 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1192 'uploader': 'Olympics',
cbe2bd91
PH
1193 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1194 },
1195 'params': {
1196 'skip_download': 'requires avconv',
e52a40ab 1197 }
cbe2bd91 1198 },
6271f1ca
PH
1199 # Non-square pixels
1200 {
1201 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1202 'info_dict': {
1203 'id': '_b-2C3KPAM0',
1204 'ext': 'mp4',
1205 'stretched_ratio': 16 / 9.,
556dbe7f 1206 'duration': 85,
6271f1ca
PH
1207 'upload_date': '20110310',
1208 'uploader_id': 'AllenMeow',
ec85ded8 1209 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1210 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1211 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1212 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1213 },
06b491eb
S
1214 },
1215 # url_encoded_fmt_stream_map is empty string
1216 {
1217 'url': 'qEJwOuvDf7I',
1218 'info_dict': {
1219 'id': 'qEJwOuvDf7I',
f57b7835 1220 'ext': 'webm',
06b491eb
S
1221 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1222 'description': '',
1223 'upload_date': '20150404',
1224 'uploader_id': 'spbelect',
1225 'uploader': 'Наблюдатели Петербурга',
1226 },
1227 'params': {
1228 'skip_download': 'requires avconv',
e323cf3f
S
1229 },
1230 'skip': 'This live event has ended.',
06b491eb 1231 },
067aa17e 1232 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1233 {
1234 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1235 'info_dict': {
1236 'id': 'FIl7x6_3R5Y',
eb6793ba 1237 'ext': 'webm',
da77d856
S
1238 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1239 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1240 'duration': 220,
da77d856
S
1241 'upload_date': '20150625',
1242 'uploader_id': 'dorappi2000',
ec85ded8 1243 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1244 'uploader': 'dorappi2000',
eb6793ba 1245 'formats': 'mincount:31',
da77d856 1246 },
eb6793ba 1247 'skip': 'not actual anymore',
2ee8f5d8 1248 },
8a1a26ce
YCH
1249 # DASH manifest with segment_list
1250 {
1251 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1252 'md5': '8ce563a1d667b599d21064e982ab9e31',
1253 'info_dict': {
1254 'id': 'CsmdDsKjzN8',
1255 'ext': 'mp4',
17ee98e1 1256 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1257 'uploader': 'Airtek',
1258 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1259 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1260 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1261 },
1262 'params': {
1263 'youtube_include_dash_manifest': True,
1264 'format': '135', # bestvideo
be49068d
S
1265 },
1266 'skip': 'This live event has ended.',
2ee8f5d8 1267 },
cf7e015f
S
1268 {
1269 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1270 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1271 'info_dict': {
545cc85d 1272 'id': 'jvGDaLqkpTg',
1273 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1274 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1275 },
1276 'playlist': [{
1277 'info_dict': {
545cc85d 1278 'id': 'jvGDaLqkpTg',
cf7e015f 1279 'ext': 'mp4',
545cc85d 1280 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1281 'description': 'md5:e03b909557865076822aa169218d6a5d',
1282 'duration': 10643,
1283 'upload_date': '20161111',
1284 'uploader': 'Team PGP',
1285 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1286 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1287 },
1288 }, {
1289 'info_dict': {
545cc85d 1290 'id': '3AKt1R1aDnw',
cf7e015f 1291 'ext': 'mp4',
545cc85d 1292 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1293 'description': 'md5:e03b909557865076822aa169218d6a5d',
1294 'duration': 10991,
1295 'upload_date': '20161111',
1296 'uploader': 'Team PGP',
1297 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1299 },
1300 }, {
1301 'info_dict': {
545cc85d 1302 'id': 'RtAMM00gpVc',
cf7e015f 1303 'ext': 'mp4',
545cc85d 1304 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1305 'description': 'md5:e03b909557865076822aa169218d6a5d',
1306 'duration': 10995,
1307 'upload_date': '20161111',
1308 'uploader': 'Team PGP',
1309 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1310 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1311 },
1312 }, {
1313 'info_dict': {
545cc85d 1314 'id': '6N2fdlP3C5U',
cf7e015f 1315 'ext': 'mp4',
545cc85d 1316 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1317 'description': 'md5:e03b909557865076822aa169218d6a5d',
1318 'duration': 10990,
1319 'upload_date': '20161111',
1320 'uploader': 'Team PGP',
1321 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1322 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1323 },
1324 }],
1325 'params': {
1326 'skip_download': True,
1327 },
65c2fde2 1328 'skip': 'Not multifeed anymore',
cbaed4bb 1329 },
f9f49d87 1330 {
067aa17e 1331 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1332 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1333 'info_dict': {
1334 'id': 'gVfLd0zydlo',
1335 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1336 },
1337 'playlist_count': 2,
be49068d 1338 'skip': 'Not multifeed anymore',
f9f49d87 1339 },
cbaed4bb 1340 {
2d3d2997 1341 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1342 'only_matching': True,
0e49d9a6 1343 },
6d4fc66b 1344 {
2d3d2997 1345 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1346 'only_matching': True,
1347 },
0e49d9a6 1348 {
067aa17e 1349 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1350 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1351 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1352 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1353 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1354 'info_dict': {
1355 'id': 'lsguqyKfVQg',
1356 'ext': 'mp4',
1357 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1358 'alt_title': 'Dark Walk',
0e49d9a6 1359 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1360 'duration': 133,
0e49d9a6
LL
1361 'upload_date': '20151119',
1362 'uploader_id': 'IronSoulElf',
ec85ded8 1363 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1364 'uploader': 'IronSoulElf',
11f9be09 1365 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1366 'track': 'Dark Walk',
1367 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1368 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1369 },
1370 'params': {
1371 'skip_download': True,
1372 },
1373 },
61f92af1 1374 {
067aa17e 1375 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1376 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1377 'only_matching': True,
1378 },
313dfc45
LL
1379 {
1380 # Video with yt:stretch=17:0
1381 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1382 'info_dict': {
1383 'id': 'Q39EVAstoRM',
1384 'ext': 'mp4',
1385 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1386 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1387 'upload_date': '20151107',
1388 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1389 'uploader': 'CH GAMER DROID',
1390 },
1391 'params': {
1392 'skip_download': True,
1393 },
be49068d 1394 'skip': 'This video does not exist.',
313dfc45 1395 },
201c1459 1396 {
1397 # Video with incomplete 'yt:stretch=16:'
1398 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1399 'only_matching': True,
1400 },
7caf9830
S
1401 {
1402 # Video licensed under Creative Commons
1403 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1404 'info_dict': {
1405 'id': 'M4gD1WSo5mA',
1406 'ext': 'mp4',
1407 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1408 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1409 'duration': 721,
7caf9830
S
1410 'upload_date': '20150127',
1411 'uploader_id': 'BerkmanCenter',
ec85ded8 1412 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1413 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1414 'license': 'Creative Commons Attribution license (reuse allowed)',
1415 },
1416 'params': {
1417 'skip_download': True,
1418 },
1419 },
fd050249
S
1420 {
1421 # Channel-like uploader_url
1422 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1423 'info_dict': {
1424 'id': 'eQcmzGIKrzg',
1425 'ext': 'mp4',
1426 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1427 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1428 'duration': 4060,
fd050249 1429 'upload_date': '20151119',
eb6793ba 1430 'uploader': 'Bernie Sanders',
fd050249 1431 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1433 'license': 'Creative Commons Attribution license (reuse allowed)',
1434 },
1435 'params': {
1436 'skip_download': True,
1437 },
1438 },
040ac686
S
1439 {
1440 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1441 'only_matching': True,
7f29cf54
S
1442 },
1443 {
067aa17e 1444 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1445 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1446 'only_matching': True,
6496ccb4
S
1447 },
1448 {
1449 # Rental video preview
1450 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1451 'info_dict': {
1452 'id': 'uGpuVWrhIzE',
1453 'ext': 'mp4',
1454 'title': 'Piku - Trailer',
1455 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1456 'upload_date': '20150811',
1457 'uploader': 'FlixMatrix',
1458 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1459 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1460 'license': 'Standard YouTube License',
1461 },
1462 'params': {
1463 'skip_download': True,
1464 },
eb6793ba 1465 'skip': 'This video is not available.',
022a5d66 1466 },
12afdc2a
S
1467 {
1468 # YouTube Red video with episode data
1469 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1470 'info_dict': {
1471 'id': 'iqKdEhx-dD4',
1472 'ext': 'mp4',
1473 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1474 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1475 'duration': 2085,
12afdc2a
S
1476 'upload_date': '20170118',
1477 'uploader': 'Vsauce',
1478 'uploader_id': 'Vsauce',
1479 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1480 'series': 'Mind Field',
1481 'season_number': 1,
1482 'episode_number': 1,
1483 },
1484 'params': {
1485 'skip_download': True,
1486 },
1487 'expected_warnings': [
1488 'Skipping DASH manifest',
1489 ],
1490 },
c7121fa7
S
1491 {
1492 # The following content has been identified by the YouTube community
1493 # as inappropriate or offensive to some audiences.
1494 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1495 'info_dict': {
1496 'id': '6SJNVb0GnPI',
1497 'ext': 'mp4',
1498 'title': 'Race Differences in Intelligence',
1499 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1500 'duration': 965,
1501 'upload_date': '20140124',
1502 'uploader': 'New Century Foundation',
1503 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1504 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1505 },
1506 'params': {
1507 'skip_download': True,
1508 },
545cc85d 1509 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1510 },
022a5d66
S
1511 {
1512 # itag 212
1513 'url': '1t24XAntNCY',
1514 'only_matching': True,
fd5c4aab
S
1515 },
1516 {
1517 # geo restricted to JP
1518 'url': 'sJL6WA-aGkQ',
1519 'only_matching': True,
1520 },
cd5a74a2
S
1521 {
1522 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1523 'only_matching': True,
1524 },
bc2ca1bb 1525 {
1526 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1527 'only_matching': True,
1528 },
1529 {
1530 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1531 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1532 'only_matching': True,
1533 },
825cd268
RA
1534 {
1535 # DRM protected
1536 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1537 'only_matching': True,
4fe54c12
S
1538 },
1539 {
1540 # Video with unsupported adaptive stream type formats
1541 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1542 'info_dict': {
1543 'id': 'Z4Vy8R84T1U',
1544 'ext': 'mp4',
1545 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1546 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1547 'duration': 433,
1548 'upload_date': '20130923',
1549 'uploader': 'Amelia Putri Harwita',
1550 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1551 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1552 'formats': 'maxcount:10',
1553 },
1554 'params': {
1555 'skip_download': True,
1556 'youtube_include_dash_manifest': False,
1557 },
5429d6a9 1558 'skip': 'not actual anymore',
5caabd3c 1559 },
1560 {
822b9d9c 1561 # Youtube Music Auto-generated description
5caabd3c 1562 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1563 'info_dict': {
1564 'id': 'MgNrAu2pzNs',
1565 'ext': 'mp4',
1566 'title': 'Voyeur Girl',
1567 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1568 'upload_date': '20190312',
5429d6a9
S
1569 'uploader': 'Stephen - Topic',
1570 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1571 'artist': 'Stephen',
1572 'track': 'Voyeur Girl',
1573 'album': 'it\'s too much love to know my dear',
1574 'release_date': '20190313',
1575 'release_year': 2019,
1576 },
1577 'params': {
1578 'skip_download': True,
1579 },
1580 },
66b48727
RA
1581 {
1582 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1583 'only_matching': True,
1584 },
011e75e6
S
1585 {
1586 # invalid -> valid video id redirection
1587 'url': 'DJztXj2GPfl',
1588 'info_dict': {
1589 'id': 'DJztXj2GPfk',
1590 'ext': 'mp4',
1591 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1592 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1593 'upload_date': '20090125',
1594 'uploader': 'Prochorowka',
1595 'uploader_id': 'Prochorowka',
1596 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1597 'artist': 'Panjabi MC',
1598 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1599 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1600 },
1601 'params': {
1602 'skip_download': True,
1603 },
545cc85d 1604 'skip': 'Video unavailable',
ea74e00b
DP
1605 },
1606 {
1607 # empty description results in an empty string
1608 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1609 'info_dict': {
1610 'id': 'x41yOUIvK2k',
1611 'ext': 'mp4',
1612 'title': 'IMG 3456',
1613 'description': '',
1614 'upload_date': '20170613',
1615 'uploader_id': 'ElevageOrVert',
1616 'uploader': 'ElevageOrVert',
1617 },
1618 'params': {
1619 'skip_download': True,
1620 },
1621 },
a0566bbf 1622 {
29f7c58a 1623 # with '};' inside yt initial data (see [1])
1624 # see [2] for an example with '};' inside ytInitialPlayerResponse
1625 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1626 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1627 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1628 'info_dict': {
1629 'id': 'CHqg6qOn4no',
1630 'ext': 'mp4',
1631 'title': 'Part 77 Sort a list of simple types in c#',
1632 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1633 'upload_date': '20130831',
1634 'uploader_id': 'kudvenkat',
1635 'uploader': 'kudvenkat',
1636 },
1637 'params': {
1638 'skip_download': True,
1639 },
1640 },
29f7c58a 1641 {
1642 # another example of '};' in ytInitialData
1643 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1644 'only_matching': True,
1645 },
1646 {
1647 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1648 'only_matching': True,
1649 },
545cc85d 1650 {
cc2db878 1651 # https://github.com/ytdl-org/youtube-dl/pull/28094
1652 'url': 'OtqTfy26tG0',
1653 'info_dict': {
1654 'id': 'OtqTfy26tG0',
1655 'ext': 'mp4',
1656 'title': 'Burn Out',
1657 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1658 'upload_date': '20141120',
1659 'uploader': 'The Cinematic Orchestra - Topic',
1660 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1661 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1662 'artist': 'The Cinematic Orchestra',
1663 'track': 'Burn Out',
1664 'album': 'Every Day',
1665 'release_data': None,
1666 'release_year': None,
1667 },
1668 'params': {
1669 'skip_download': True,
1670 },
545cc85d 1671 },
bc2ca1bb 1672 {
1673 # controversial video, only works with bpctr when authenticated with cookies
1674 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1675 'only_matching': True,
1676 },
a1a7907b 1677 {
1678 # controversial video, requires bpctr/contentCheckOk
1679 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1680 'info_dict': {
1681 'id': 'SZJvDhaSDnc',
1682 'ext': 'mp4',
1683 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1684 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1685 'uploader': 'CBS This Morning',
11f9be09 1686 'uploader_id': 'CBSThisMorning',
a1a7907b 1687 'upload_date': '20140716',
1688 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1689 }
1690 },
f7ad7160 1691 {
1692 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1693 'url': 'cBvYw8_A0vQ',
1694 'info_dict': {
1695 'id': 'cBvYw8_A0vQ',
1696 'ext': 'mp4',
1697 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1698 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1699 'upload_date': '20201120',
1700 'uploader': 'Walk around Japan',
1701 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1702 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1703 },
1704 'params': {
1705 'skip_download': True,
1706 },
0fb983f6 1707 }, {
1708 # Has multiple audio streams
1709 'url': 'WaOKSUlf4TM',
1710 'only_matching': True
9297939e 1711 }, {
1712 # Requires Premium: has format 141 when requested using YTM url
1713 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1714 'only_matching': True
1715 }, {
120916da 1716 # multiple subtitles with same lang_code
1717 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1718 'only_matching': True,
109dd3b2 1719 }, {
1720 # Force use android client fallback
1721 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1722 'info_dict': {
1723 'id': 'YOelRv7fMxY',
11f9be09 1724 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1725 'ext': '3gp',
1726 'upload_date': '20210624',
1727 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1728 'uploader': 'colinfurze',
11f9be09 1729 'uploader_id': 'colinfurze',
109dd3b2 1730 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1731 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1732 },
1733 'params': {
1734 'format': '17', # 3gp format available on android
1735 'extractor_args': {'youtube': {'player_client': ['android']}},
1736 },
120916da 1737 },
109dd3b2 1738 {
1739 # Skip download of additional client configs (remix client config in this case)
1740 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1741 'only_matching': True,
1742 'params': {
1743 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1744 },
8fc54b12 1745 }, {
1746 # shorts
1747 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1748 'only_matching': True,
9222c381 1749 }, {
1750 'note': 'Storyboards',
1751 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1752 'info_dict': {
1753 'id': '5KLPxDtMqe8',
1754 'ext': 'mhtml',
1755 'format_id': 'sb0',
1756 'title': 'Your Brain is Plastic',
1757 'uploader_id': 'scishow',
1758 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1759 'upload_date': '20140324',
1760 'uploader': 'SciShow',
1761 }, 'params': {'format': 'mhtml', 'skip_download': True}
1762 }
2eb88d95
PH
1763 ]
1764
201c1459 1765 @classmethod
1766 def suitable(cls, url):
4dfbf869 1767 from ..utils import parse_qs
1768
201c1459 1769 qs = parse_qs(url)
1770 if qs.get('list', [None])[0]:
1771 return False
1772 return super(YoutubeIE, cls).suitable(url)
1773
e0df6211
PH
1774 def __init__(self, *args, **kwargs):
1775 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1776 self._code_cache = {}
83799698 1777 self._player_cache = {}
e0df6211 1778
adbc4ec4
THD
1779 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
1780 EXPIRATION_DURATION = 18_000
1781 lock = threading.Lock()
1782
1783 is_live = True
1784 expiration_time = time.time() + EXPIRATION_DURATION
1785 formats = [f for f in formats if f.get('is_from_start')]
1786
1787 def refetch_manifest(format_id):
1788 nonlocal formats, expiration_time, is_live
1789 if time.time() <= expiration_time:
1790 return
1791
1792 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
1793 video_details = traverse_obj(
1794 prs, (..., 'videoDetails'), expected_type=dict, default=[])
1795 microformats = traverse_obj(
1796 prs, (..., 'microformat', 'playerMicroformatRenderer'),
1797 expected_type=dict, default=[])
1798 _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
1799 expiration_time = time.time() + EXPIRATION_DURATION
1800
1801 def mpd_feed(format_id):
1802 """
1803 @returns (manifest_url, manifest_stream_number, is_live) or None
1804 """
1805 with lock:
1806 refetch_manifest(format_id)
1807
1808 f = next((f for f in formats if f['format_id'] == format_id), None)
1809 if not f:
1810 self.report_warning(
1811 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
1812 return None
1813 return f['manifest_url'], f['manifest_stream_number'], is_live
1814
1815 for f in formats:
1816 f['protocol'] = 'http_dash_segments_generator'
1817 f['fragments'] = functools.partial(
1818 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
1819
1820 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
1821 FETCH_SPAN, MAX_DURATION = 5, 432000
1822
1823 mpd_url, stream_number, is_live = None, None, True
1824
1825 begin_index = 0
1826 download_start_time = ctx.get('start') or time.time()
1827
1828 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
1829 if lack_early_segments:
1830 self.report_warning(bug_reports_message(
1831 'Starting download from the last 120 hours of the live stream since '
1832 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
1833 lack_early_segments = True
1834
1835 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
1836 fragments, fragment_base_url = None, None
1837
1838 def _extract_sequence_from_mpd(refresh_sequence):
1839 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
1840 # Obtain from MPD's maximum seq value
1841 old_mpd_url = mpd_url
1842 mpd_url, stream_number, is_live = mpd_feed(format_id) or (mpd_url, stream_number, False)
1843 if old_mpd_url == mpd_url and not refresh_sequence:
1844 return True, last_seq
1845 try:
1846 fmts, _ = self._extract_mpd_formats_and_subtitles(
1847 mpd_url, None, note=False, errnote=False, fatal=False)
1848 except ExtractorError:
1849 fmts = None
1850 if not fmts:
1851 no_fragment_score += 1
1852 return False, last_seq
1853 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
1854 fragments = fmt_info['fragments']
1855 fragment_base_url = fmt_info['fragment_base_url']
1856 assert fragment_base_url
1857
1858 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
1859 return True, _last_seq
1860
1861 while is_live:
1862 fetch_time = time.time()
1863 if no_fragment_score > 30:
1864 return
1865 if last_segment_url:
1866 # Obtain from "X-Head-Seqnum" header value from each segment
1867 try:
1868 urlh = self._request_webpage(
1869 last_segment_url, None, note=False, errnote=False, fatal=False)
1870 except ExtractorError:
1871 urlh = None
1872 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
1873 if last_seq is None:
1874 no_fragment_score += 1
1875 last_segment_url = None
1876 continue
1877 else:
1878 should_retry, last_seq = _extract_sequence_from_mpd(True)
1879 if not should_retry:
1880 continue
1881
1882 if known_idx > last_seq:
1883 last_segment_url = None
1884 continue
1885
1886 last_seq += 1
1887
1888 if begin_index < 0 and known_idx < 0:
1889 # skip from the start when it's negative value
1890 known_idx = last_seq + begin_index
1891 if lack_early_segments:
1892 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
1893 try:
1894 for idx in range(known_idx, last_seq):
1895 # do not update sequence here or you'll get skipped some part of it
1896 should_retry, _ = _extract_sequence_from_mpd(False)
1897 if not should_retry:
1898 # retry when it gets weird state
1899 known_idx = idx - 1
1900 raise ExtractorError('breaking out of outer loop')
1901 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
1902 yield {
1903 'url': last_segment_url,
1904 }
1905 if known_idx == last_seq:
1906 no_fragment_score += 5
1907 else:
1908 no_fragment_score = 0
1909 known_idx = last_seq
1910 except ExtractorError:
1911 continue
1912
1913 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
1914
b6de707d 1915 def _extract_player_url(self, *ytcfgs, webpage=None):
1916 player_url = traverse_obj(
1917 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1918 get_all=False, expected_type=compat_str)
11f9be09 1919 if not player_url:
b6de707d 1920 return
109dd3b2 1921 if player_url.startswith('//'):
1922 player_url = 'https:' + player_url
1923 elif not re.match(r'https?://', player_url):
1924 player_url = compat_urlparse.urljoin(
1925 'https://www.youtube.com', player_url)
1926 return player_url
1927
b6de707d 1928 def _download_player_url(self, video_id, fatal=False):
1929 res = self._download_webpage(
1930 'https://www.youtube.com/iframe_api',
1931 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1932 if res:
1933 player_version = self._search_regex(
1934 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1935 if player_version:
1936 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1937
60064c53
PH
1938 def _signature_cache_id(self, example_sig):
1939 """ Return a string representation of a signature """
78caa52a 1940 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1941
e40c758c
S
1942 @classmethod
1943 def _extract_player_info(cls, player_url):
1944 for player_re in cls._PLAYER_INFO_RE:
1945 id_m = re.search(player_re, player_url)
1946 if id_m:
1947 break
1948 else:
c081b35c 1949 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1950 return id_m.group('id')
e40c758c 1951
404f611f 1952 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 1953 player_id = self._extract_player_info(player_url)
1954 if player_id not in self._code_cache:
1276a43a 1955 code = self._download_webpage(
109dd3b2 1956 player_url, video_id, fatal=fatal,
1957 note='Downloading player ' + player_id,
1958 errnote='Download of %s failed' % player_url)
1276a43a 1959 if code:
1960 self._code_cache[player_id] = code
404f611f 1961 return self._code_cache.get(player_id)
109dd3b2 1962
e40c758c 1963 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1964 player_id = self._extract_player_info(player_url)
e0df6211 1965
c4417ddb 1966 # Read from filesystem cache
545cc85d 1967 func_id = 'js_%s_%s' % (
1968 player_id, self._signature_cache_id(example_sig))
c4417ddb 1969 assert os.path.basename(func_id) == func_id
a0e07d31 1970
69ea8ca4 1971 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1972 if cache_spec is not None:
78caa52a 1973 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1974
404f611f 1975 code = self._load_player(video_id, player_url)
1976 if code:
109dd3b2 1977 res = self._parse_sig_js(code)
e0df6211 1978
109dd3b2 1979 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1980 cache_res = res(test_string)
1981 cache_spec = [ord(c) for c in cache_res]
83799698 1982
109dd3b2 1983 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1984 return res
83799698 1985
60064c53 1986 def _print_sig_code(self, func, example_sig):
404f611f 1987 if not self.get_param('youtube_print_sig_code'):
1988 return
1989
edf3e38e
PH
1990 def gen_sig_code(idxs):
1991 def _genslice(start, end, step):
78caa52a 1992 starts = '' if start == 0 else str(start)
8bcc8756 1993 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1994 steps = '' if step == 1 else (':%d' % step)
78caa52a 1995 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1996
1997 step = None
7af808a5
PH
1998 # Quelch pyflakes warnings - start will be set when step is set
1999 start = '(Never used)'
edf3e38e
PH
2000 for i, prev in zip(idxs[1:], idxs[:-1]):
2001 if step is not None:
2002 if i - prev == step:
2003 continue
2004 yield _genslice(start, prev, step)
2005 step = None
2006 continue
2007 if i - prev in [-1, 1]:
2008 step = i - prev
2009 start = prev
2010 continue
2011 else:
78caa52a 2012 yield 's[%d]' % prev
edf3e38e 2013 if step is None:
78caa52a 2014 yield 's[%d]' % i
edf3e38e
PH
2015 else:
2016 yield _genslice(start, i, step)
2017
78caa52a 2018 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 2019 cache_res = func(test_string)
edf3e38e 2020 cache_spec = [ord(c) for c in cache_res]
78caa52a 2021 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
2022 signature_id_tuple = '(%s)' % (
2023 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2024 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2025 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2026 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2027
e0df6211
PH
2028 def _parse_sig_js(self, jscode):
2029 funcname = self._search_regex(
abefc03f
S
2030 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2031 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2032 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2033 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2034 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2035 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2036 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2037 # Obsolete patterns
2038 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2039 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2040 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2041 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2042 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2043 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2044 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2045 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2046 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2047
2048 jsi = JSInterpreter(jscode)
2049 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2050 return lambda s: initial_function([s])
2051
545cc85d 2052 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2053 """Turn the encrypted s field into a working signature"""
6b37f0be 2054
c8bf86d5 2055 if player_url is None:
69ea8ca4 2056 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 2057
c8bf86d5 2058 try:
62af3a0e 2059 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
2060 if player_id not in self._player_cache:
2061 func = self._extract_signature_function(
60064c53 2062 video_id, player_url, s
c8bf86d5
PH
2063 )
2064 self._player_cache[player_id] = func
2065 func = self._player_cache[player_id]
404f611f 2066 self._print_sig_code(func, s)
c8bf86d5
PH
2067 return func(s)
2068 except Exception as e:
404f611f 2069 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
2070
2071 def _decrypt_nsig(self, s, video_id, player_url):
2072 """Turn the encrypted n field into a working signature"""
2073 if player_url is None:
2074 raise ExtractorError('Cannot decrypt nsig without player_url')
2075 if player_url.startswith('//'):
2076 player_url = 'https:' + player_url
2077 elif not re.match(r'https?://', player_url):
2078 player_url = compat_urlparse.urljoin(
2079 'https://www.youtube.com', player_url)
2080
2081 sig_id = ('nsig_value', s)
2082 if sig_id in self._player_cache:
2083 return self._player_cache[sig_id]
2084
2085 try:
2086 player_id = ('nsig', player_url)
2087 if player_id not in self._player_cache:
2088 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
2089 func = self._player_cache[player_id]
2090 self._player_cache[sig_id] = func(s)
2091 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
2092 return self._player_cache[sig_id]
2093 except Exception as e:
aa9369a2 2094 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
404f611f 2095
2096 def _extract_n_function_name(self, jscode):
2097 return self._search_regex(
2098 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
2099 jscode, 'Initial JS player n function name', group='nfunc')
2100
2101 def _extract_n_function(self, video_id, player_url):
2102 player_id = self._extract_player_info(player_url)
2103 func_code = self._downloader.cache.load('youtube-nsig', player_id)
2104
2105 if func_code:
2106 jsi = JSInterpreter(func_code)
2107 else:
2108 jscode = self._load_player(video_id, player_url)
2109 funcname = self._extract_n_function_name(jscode)
2110 jsi = JSInterpreter(jscode)
2111 func_code = jsi.extract_function_code(funcname)
2112 self._downloader.cache.store('youtube-nsig', player_id, func_code)
2113
2114 if self.get_param('youtube_print_sig_code'):
2115 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2116
2117 return lambda s: jsi.extract_function_from_code(*func_code)([s])
e0df6211 2118
109dd3b2 2119 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2120 """
2121 Extract signatureTimestamp (sts)
2122 Required to tell API what sig/player version is in use.
2123 """
2124 sts = None
2125 if isinstance(ytcfg, dict):
2126 sts = int_or_none(ytcfg.get('STS'))
2127
2128 if not sts:
2129 # Attempt to extract from player
2130 if player_url is None:
2131 error_msg = 'Cannot extract signature timestamp without player_url.'
2132 if fatal:
2133 raise ExtractorError(error_msg)
2134 self.report_warning(error_msg)
2135 return
404f611f 2136 code = self._load_player(video_id, player_url, fatal=fatal)
2137 if code:
109dd3b2 2138 sts = int_or_none(self._search_regex(
2139 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2140 'JS player signature timestamp', group='sts', fatal=fatal))
2141 return sts
2142
11f9be09 2143 def _mark_watched(self, video_id, player_responses):
9222c381 2144 playback_url = get_first(
2145 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2146 expected_type=url_or_none)
d77ab8e2 2147 if not playback_url:
352d63fd 2148 self.report_warning('Unable to mark watched')
d77ab8e2
S
2149 return
2150 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2151 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2152
2153 # cpn generation algorithm is reverse engineered from base.js.
2154 # In fact it works even with dummy cpn.
2155 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2156 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2157
2158 qs.update({
2159 'ver': ['2'],
2160 'cpn': [cpn],
2161 })
2162 playback_url = compat_urlparse.urlunparse(
15707c7e 2163 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
2164
2165 self._download_webpage(
2166 playback_url, video_id, 'Marking watched',
2167 'Unable to mark watched', fatal=False)
2168
66c9fa36
S
2169 @staticmethod
2170 def _extract_urls(webpage):
2171 # Embedded YouTube player
2172 entries = [
2173 unescapeHTML(mobj.group('url'))
2174 for mobj in re.finditer(r'''(?x)
2175 (?:
2176 <iframe[^>]+?src=|
2177 data-video-url=|
2178 <embed[^>]+?src=|
2179 embedSWF\(?:\s*|
2180 <object[^>]+data=|
2181 new\s+SWFObject\(
2182 )
2183 (["\'])
2184 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2185 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2186 \1''', webpage)]
2187
2188 # lazyYT YouTube embed
2189 entries.extend(list(map(
2190 unescapeHTML,
2191 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2192
2193 # Wordpress "YouTube Video Importer" plugin
2194 matches = re.findall(r'''(?x)<div[^>]+
2195 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2196 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2197 entries.extend(m[-1] for m in matches)
2198
2199 return entries
2200
2201 @staticmethod
2202 def _extract_url(webpage):
2203 urls = YoutubeIE._extract_urls(webpage)
2204 return urls[0] if urls else None
2205
97665381
PH
2206 @classmethod
2207 def extract_id(cls, url):
2208 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2209 if mobj is None:
69ea8ca4 2210 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 2211 return mobj.group('id')
c5e8d7af 2212
7c365c21 2213 def _extract_chapters_from_json(self, data, duration):
2214 chapter_list = traverse_obj(
2215 data, (
2216 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2217 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2218 ), expected_type=list)
2219
2220 return self._extract_chapters(
2221 chapter_list,
2222 chapter_time=lambda chapter: float_or_none(
2223 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2224 chapter_title=lambda chapter: traverse_obj(
2225 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2226 duration=duration)
2227
2228 def _extract_chapters_from_engagement_panel(self, data, duration):
2229 content_list = traverse_obj(
8bdd16b4 2230 data,
7c365c21 2231 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2232 expected_type=list, default=[])
052e1350 2233 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2234 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2235
2236 return next((
2237 filter(None, (
2238 self._extract_chapters(
2239 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2240 chapter_time, chapter_title, duration)
2241 for contents in content_list
2242 ))), [])
2243
2244 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2245 chapters = []
7c365c21 2246 last_chapter = {'start_time': 0}
2247 for idx, chapter in enumerate(chapter_list or []):
2248 title = chapter_title(chapter)
84213ea8
S
2249 start_time = chapter_time(chapter)
2250 if start_time is None:
2251 continue
7c365c21 2252 last_chapter['end_time'] = start_time
2253 if start_time < last_chapter['start_time']:
2254 if idx == 1:
2255 chapters.pop()
2256 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2257 else:
2258 self.report_warning(f'Invalid start time for chapter "{title}"')
2259 continue
2260 last_chapter = {'start_time': start_time, 'title': title}
2261 chapters.append(last_chapter)
2262 last_chapter['end_time'] = duration
84213ea8
S
2263 return chapters
2264
545cc85d 2265 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2266 return self._parse_json(self._search_regex(
2267 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2268 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2269
a1c5d2ca
M
2270 def _extract_comment(self, comment_renderer, parent=None):
2271 comment_id = comment_renderer.get('commentId')
2272 if not comment_id:
2273 return
fe93e2c4 2274
052e1350 2275 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2276
49bd8c66 2277 # note: timestamp is an estimate calculated from the current time and time_text
f3aa3c3f 2278 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
052e1350 2279 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2280 author_id = try_get(comment_renderer,
2281 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2282
49bd8c66 2283 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2284 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2285 author_thumbnail = try_get(comment_renderer,
2286 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2287
2288 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2289 is_favorited = 'creatorHeart' in (try_get(
2290 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2291 return {
2292 'id': comment_id,
2293 'text': text,
d92f5d5a 2294 'timestamp': timestamp,
a1c5d2ca
M
2295 'time_text': time_text,
2296 'like_count': votes,
97524332 2297 'is_favorited': is_favorited,
a1c5d2ca
M
2298 'author': author,
2299 'author_id': author_id,
2300 'author_thumbnail': author_thumbnail,
2301 'author_is_uploader': author_is_uploader,
2302 'parent': parent or 'root'
2303 }
2304
46383212 2305 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2306
2307 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 2308
2309 def extract_header(contents):
2d6659b9 2310 _continuation = None
2311 for content in contents:
46383212 2312 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
fe93e2c4 2313 expected_comment_count = parse_count(self._get_text(
052e1350 2314 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2315
2d6659b9 2316 if expected_comment_count:
46383212 2317 tracker['est_total'] = expected_comment_count
2318 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2319 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 2320
2321 sort_menu_item = try_get(
2322 comments_header_renderer,
2323 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2324 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2325
2326 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2327 if not _continuation:
2328 continue
2329
46383212 2330 sort_text = str_or_none(sort_menu_item.get('title'))
2331 if not sort_text:
2d6659b9 2332 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 2333 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 2334 break
a2160aa4 2335 return _continuation
a1c5d2ca 2336
2d6659b9 2337 def extract_thread(contents):
a1c5d2ca 2338 if not parent:
46383212 2339 tracker['current_page_thread'] = 0
a1c5d2ca 2340 for content in contents:
46383212 2341 if not parent and tracker['total_parent_comments'] >= max_parents:
2342 yield
a1c5d2ca 2343 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 2344 comment_renderer = get_first(
2345 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2346 expected_type=dict, default={})
a1c5d2ca 2347
a1c5d2ca
M
2348 comment = self._extract_comment(comment_renderer, parent)
2349 if not comment:
2350 continue
46383212 2351
2352 tracker['running_total'] += 1
2353 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 2354 yield comment
46383212 2355
a1c5d2ca
M
2356 # Attempt to get the replies
2357 comment_replies_renderer = try_get(
2358 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2359
2360 if comment_replies_renderer:
46383212 2361 tracker['current_page_thread'] += 1
a1c5d2ca 2362 comment_entries_iter = self._comment_entries(
99e9e001 2363 comment_replies_renderer, ytcfg, video_id,
46383212 2364 parent=comment.get('id'), tracker=tracker)
2365 for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
a1c5d2ca
M
2366 yield reply_comment
2367
46383212 2368 # Keeps track of counts across recursive calls
2369 if not tracker:
2370 tracker = dict(
2371 running_total=0,
2372 est_total=0,
2373 current_page_thread=0,
2374 total_parent_comments=0,
2375 total_reply_comments=0)
2376
2377 # TODO: Deprecated
2d6659b9 2378 # YouTube comments have a max depth of 2
46383212 2379 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2380 if max_depth:
2381 self._downloader.deprecation_warning(
2382 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2d6659b9 2383 if max_depth == 1 and parent:
2384 return
a1c5d2ca 2385
46383212 2386 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2387 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 2388
46383212 2389 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 2390 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2391 if message and not parent:
2392 self.report_warning(message, video_id=video_id)
2393
46383212 2394 response = None
2d6659b9 2395 is_first_continuation = parent is None
a1c5d2ca
M
2396
2397 for page_num in itertools.count(0):
2398 if not continuation:
2399 break
46383212 2400 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2401 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 2402 if page_num == 0:
2403 if is_first_continuation:
2404 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2405 else:
2d6659b9 2406 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 2407 tracker['current_page_thread'], comment_prog_str)
2d6659b9 2408 else:
2409 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2410 ' ' if parent else '', ' replies' if parent else '',
2411 page_num, comment_prog_str)
2412
2413 response = self._extract_response(
fe93e2c4 2414 item_id=None, query=continuation,
2d6659b9 2415 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
46383212 2416 check_get_keys='onResponseReceivedEndpoints')
a1c5d2ca 2417
46383212 2418 continuation_contents = traverse_obj(
2419 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 2420
2d6659b9 2421 continuation = None
46383212 2422 for continuation_section in continuation_contents:
2423 continuation_items = traverse_obj(
2424 continuation_section,
2425 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2426 get_all=False, expected_type=list) or []
2427 if is_first_continuation:
2428 continuation = extract_header(continuation_items)
2429 is_first_continuation = False
2d6659b9 2430 if continuation:
a1c5d2ca 2431 break
46383212 2432 continue
a1c5d2ca 2433
46383212 2434 for entry in extract_thread(continuation_items):
2435 if not entry:
2436 return
2437 yield entry
2438 continuation = self._extract_continuation({'contents': continuation_items})
2439 if continuation:
2d6659b9 2440 break
a1c5d2ca 2441
a2160aa4 2442 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2443 """Entry for comment extraction"""
2d6659b9 2444 def _real_comment_extract(contents):
aae16f6e 2445 renderer = next((
2446 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2447 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2448 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 2449
a2160aa4 2450 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 2451 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 2452
109dd3b2 2453 @staticmethod
99e9e001 2454 def _get_checkok_params():
2455 return {'contentCheckOk': True, 'racyCheckOk': True}
2456
2457 @classmethod
2458 def _generate_player_context(cls, sts=None):
109dd3b2 2459 context = {
2460 'html5Preference': 'HTML5_PREF_WANTS',
2461 }
2462 if sts is not None:
2463 context['signatureTimestamp'] = sts
2464 return {
2465 'playbackContext': {
2466 'contentPlaybackContext': context
a1a7907b 2467 },
99e9e001 2468 **cls._get_checkok_params()
109dd3b2 2469 }
2470
e7e94f2a
D
2471 @staticmethod
2472 def _is_agegated(player_response):
2473 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2474 return True
e7e94f2a
D
2475
2476 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2477 AGE_GATE_REASONS = (
2478 'confirm your age', 'age-restricted', 'inappropriate', # reason
2479 'age_verification_required', 'age_check_required', # status
2480 )
2481 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2482
2483 @staticmethod
2484 def _is_unplayable(player_response):
2485 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2486
99e9e001 2487 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 2488
11f9be09 2489 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2490 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 2491 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 2492 headers = self.generate_api_headers(
99e9e001 2493 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 2494
11f9be09 2495 yt_query = {'videoId': video_id}
2496 yt_query.update(self._generate_player_context(sts))
2497 return self._extract_response(
2498 item_id=video_id, ep='player', query=yt_query,
379e44ed 2499 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 2500 default_client=client,
11f9be09 2501 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2502 ) or None
2503
11f9be09 2504 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2505 requested_clients = []
d0d012d4 2506 default = ['android', 'web']
000c15a4 2507 allowed_clients = sorted(
2508 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2509 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2510 for client in self._configuration_arg('player_client'):
2511 if client in allowed_clients:
2512 requested_clients.append(client)
d0d012d4 2513 elif client == 'default':
2514 requested_clients.extend(default)
b4c055ba 2515 elif client == 'all':
2516 requested_clients.extend(allowed_clients)
2517 else:
2518 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2519 if not requested_clients:
d0d012d4 2520 requested_clients = default
cf7e015f 2521
11f9be09 2522 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2523 requested_clients.extend(
e7e94f2a 2524 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 2525
11f9be09 2526 return orderedSet(requested_clients)
cf7e015f 2527
c0bc527b
M
2528 def _extract_player_ytcfg(self, client, video_id):
2529 url = {
2530 'web_music': 'https://music.youtube.com',
2531 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2532 }.get(client)
2533 if not url:
2534 return {}
2535 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2536 return self.extract_ytcfg(video_id, webpage) or {}
2537
99e9e001 2538 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 2539 initial_pr = None
2540 if webpage:
2541 initial_pr = self._extract_yt_initial_variable(
2542 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2543 video_id, 'initial player response')
6b09401b 2544
c0bc527b
M
2545 original_clients = clients
2546 clients = clients[::-1]
b6de707d 2547 prs = []
e7e94f2a
D
2548
2549 def append_client(client_name):
2550 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2551 clients.append(client_name)
2552
379e44ed 2553 # Android player_response does not have microFormats which are needed for
2554 # extraction of some data. So we return the initial_pr with formats
2555 # stripped out even if not requested by the user
2556 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 2557 if initial_pr:
2558 pr = dict(initial_pr)
2559 pr['streamingData'] = None
b6de707d 2560 prs.append(pr)
379e44ed 2561
2562 last_error = None
b6de707d 2563 tried_iframe_fallback = False
2564 player_url = None
c0bc527b
M
2565 while clients:
2566 client = clients.pop()
11f9be09 2567 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2568 if 'configs' not in self._configuration_arg('player_skip'):
2569 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b 2570
b6de707d 2571 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2572 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2573 if 'js' in self._configuration_arg('player_skip'):
2574 require_js_player = False
2575 player_url = None
2576
2577 if not player_url and not tried_iframe_fallback and require_js_player:
2578 player_url = self._download_player_url(video_id)
2579 tried_iframe_fallback = True
2580
379e44ed 2581 try:
2582 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 2583 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 2584 except ExtractorError as e:
2585 if last_error:
2586 self.report_warning(last_error)
2587 last_error = e
2588 continue
2589
11f9be09 2590 if pr:
b6de707d 2591 prs.append(pr)
c0bc527b 2592
e7e94f2a 2593 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
99e9e001 2594 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
e7e94f2a
D
2595 append_client(client.replace('_agegate', '_creator'))
2596 elif self._is_agegated(pr):
2597 append_client(f'{client}_agegate')
c0bc527b 2598
379e44ed 2599 if last_error:
b6de707d 2600 if not len(prs):
379e44ed 2601 raise last_error
2602 self.report_warning(last_error)
b6de707d 2603 return prs, player_url
11f9be09 2604
2605 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
a0bb6ce5 2606 itags, stream_ids = {}, []
2a9c6dcd 2607 itag_qualities, res_qualities = {}, {}
d3fc8074 2608 q = qualities([
2a9c6dcd 2609 # Normally tiny is the smallest video-only formats. But
2610 # audio-only formats with unknown quality may get tagged as tiny
2611 'tiny',
2612 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2613 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2614 ])
11f9be09 2615 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2616
545cc85d 2617 for fmt in streaming_formats:
2618 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2619 continue
321bf820 2620
cc2db878 2621 itag = str_or_none(fmt.get('itag'))
9297939e 2622 audio_track = fmt.get('audioTrack') or {}
2623 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2624 if stream_id in stream_ids:
2625 continue
2626
cc2db878 2627 quality = fmt.get('quality')
2a9c6dcd 2628 height = int_or_none(fmt.get('height'))
d3fc8074 2629 if quality == 'tiny' or not quality:
2630 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2631 # The 3gp format (17) in android client has a quality of "small",
2632 # but is actually worse than other formats
2633 if itag == '17':
2634 quality = 'tiny'
2635 if quality:
2636 if itag:
2637 itag_qualities[itag] = quality
2638 if height:
2639 res_qualities[height] = quality
cc2db878 2640 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2641 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2642 # number of fragment that would subsequently requested with (`&sq=N`)
2643 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2644 continue
2645
545cc85d 2646 fmt_url = fmt.get('url')
2647 if not fmt_url:
2648 sc = compat_parse_qs(fmt.get('signatureCipher'))
2649 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2650 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2651 if not (sc and fmt_url and encrypted_sig):
2652 continue
545cc85d 2653 if not player_url:
201e9eaa 2654 continue
545cc85d 2655 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2656 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2657 fmt_url += '&' + sp + '=' + signature
2658
404f611f 2659 query = parse_qs(fmt_url)
2660 throttled = False
b2916526 2661 if query.get('n'):
404f611f 2662 try:
2663 fmt_url = update_url_query(fmt_url, {
2664 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2665 except ExtractorError as e:
aa9369a2 2666 self.report_warning(
2667 f'nsig extraction failed: You may experience throttling for some formats\n'
2668 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
404f611f 2669 throttled = True
2670
545cc85d 2671 if itag:
a0bb6ce5 2672 itags[itag] = 'https'
9297939e 2673 stream_ids.append(stream_id)
2674
cc2db878 2675 tbr = float_or_none(
2676 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2677 dct = {
2678 'asr': int_or_none(fmt.get('audioSampleRate')),
2679 'filesize': int_or_none(fmt.get('contentLength')),
2680 'format_id': itag,
34921b43 2681 'format_note': join_nonempty(
26e8e044 2682 '%s%s' % (audio_track.get('displayName') or '',
2683 ' (default)' if audio_track.get('audioIsDefault') else ''),
404f611f 2684 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
34921b43 2685 throttled and 'THROTTLED', delim=', '),
c18d4482 2686 'source_preference': -10 if throttled else -1,
a4211baf 2687 'fps': int_or_none(fmt.get('fps')) or None,
2a9c6dcd 2688 'height': height,
dca3ff4a 2689 'quality': q(quality),
cc2db878 2690 'tbr': tbr,
545cc85d 2691 'url': fmt_url,
2a9c6dcd 2692 'width': int_or_none(fmt.get('width')),
0fb983f6 2693 'language': audio_track.get('id', '').split('.')[0],
26e8e044 2694 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
545cc85d 2695 }
60bdb7bd 2696 mime_mobj = re.match(
2697 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2698 if mime_mobj:
2699 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2700 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2701 no_audio = dct.get('acodec') == 'none'
2702 no_video = dct.get('vcodec') == 'none'
2703 if no_audio:
2704 dct['vbr'] = tbr
2705 if no_video:
2706 dct['abr'] = tbr
2707 if no_audio or no_video:
545cc85d 2708 dct['downloader_options'] = {
2709 # Youtube throttles chunks >~10M
2710 'http_chunk_size': 10485760,
bf1317d2 2711 }
7c60c33e 2712 if dct.get('ext'):
2713 dct['container'] = dct['ext'] + '_dash'
11f9be09 2714 yield dct
545cc85d 2715
adbc4ec4 2716 live_from_start = is_live and self.get_param('live_from_start')
4bb6b02f 2717 skip_manifests = self._configuration_arg('skip')
adbc4ec4
THD
2718 if not self.get_param('youtube_include_hls_manifest', True):
2719 skip_manifests.append('hls')
2720 get_dash = 'dash' not in skip_manifests and (
2721 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
2722 get_hls = not live_from_start and 'hls' not in skip_manifests
5d3a0e79 2723
a0bb6ce5 2724 def process_manifest_format(f, proto, itag):
2725 if itag in itags:
2726 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2727 return False
2728 itag = f'{itag}-{proto}'
2729 if itag:
2730 f['format_id'] = itag
2731 itags[itag] = proto
2732
2733 f['quality'] = next((
2734 q(qdict[val])
e339d25a 2735 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
a0bb6ce5 2736 if val in qdict), -1)
2737 return True
2a9c6dcd 2738
11f9be09 2739 for sd in streaming_data:
5d3a0e79 2740 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2741 if hls_manifest_url:
2a9c6dcd 2742 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
a0bb6ce5 2743 if process_manifest_format(f, 'hls', self._search_regex(
2744 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2745 yield f
545cc85d 2746
5d3a0e79 2747 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2748 if dash_manifest_url:
2a9c6dcd 2749 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
a0bb6ce5 2750 if process_manifest_format(f, 'dash', f['format_id']):
2751 f['filesize'] = int_or_none(self._search_regex(
2752 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
adbc4ec4
THD
2753 if live_from_start:
2754 f['is_from_start'] = True
2755
a0bb6ce5 2756 yield f
11f9be09 2757
720c3099 2758 def _extract_storyboard(self, player_responses, duration):
2759 spec = get_first(
2760 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2761 if not spec:
2762 return
2763 base_url = spec.pop()
2764 L = len(spec) - 1
2765 for i, args in enumerate(spec):
2766 args = args.split('#')
2767 counts = list(map(int_or_none, args[:5]))
2768 if len(args) != 8 or not all(counts):
2769 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2770 continue
2771 width, height, frame_count, cols, rows = counts
2772 N, sigh = args[6:]
2773
2774 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2775 fragment_count = frame_count / (cols * rows)
2776 fragment_duration = duration / fragment_count
2777 yield {
2778 'format_id': f'sb{i}',
2779 'format_note': 'storyboard',
2780 'ext': 'mhtml',
2781 'protocol': 'mhtml',
2782 'acodec': 'none',
2783 'vcodec': 'none',
2784 'url': url,
2785 'width': width,
2786 'height': height,
2787 'fragments': [{
2788 'path': url.replace('$M', str(j)),
2789 'duration': min(fragment_duration, duration - (j * fragment_duration)),
2790 } for j in range(math.ceil(fragment_count))],
2791 }
2792
adbc4ec4 2793 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 2794 webpage = None
2795 if 'webpage' not in self._configuration_arg('player_skip'):
2796 webpage = self._download_webpage(
2797 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
11f9be09 2798
2799 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 2800
b6de707d 2801 player_responses, player_url = self._extract_player_responses(
11f9be09 2802 self._get_requested_clients(url, smuggled_data),
99e9e001 2803 video_id, webpage, master_ytcfg)
11f9be09 2804
adbc4ec4
THD
2805 return webpage, master_ytcfg, player_responses, player_url
2806
2807 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):
2808 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2809 is_live = get_first(video_details, 'isLive')
2810 if is_live is None:
2811 is_live = get_first(live_broadcast_details, 'isLiveNow')
2812
2813 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2814 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2815
2816 return live_broadcast_details, is_live, streaming_data, formats
2817
2818 def _real_extract(self, url):
2819 url, smuggled_data = unsmuggle_url(url, {})
2820 video_id = self._match_id(url)
2821
2822 base_url = self.http_scheme() + '//www.youtube.com/'
2823 webpage_url = base_url + 'watch?v=' + video_id
2824
2825 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2826
11f9be09 2827 playability_statuses = traverse_obj(
2828 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2829
2830 trailer_video_id = get_first(
2831 playability_statuses,
2832 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2833 expected_type=str)
2834 if trailer_video_id:
2835 return self.url_result(
2836 trailer_video_id, self.ie_key(), trailer_video_id)
2837
2838 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2839 if webpage else (lambda x: None))
2840
2841 video_details = traverse_obj(
2842 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2843 microformats = traverse_obj(
2844 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2845 expected_type=dict, default=[])
2846 video_title = (
2847 get_first(video_details, 'title')
2848 or self._get_text(microformats, (..., 'title'))
2849 or search_meta(['og:title', 'twitter:title', 'title']))
2850 video_description = get_first(video_details, 'shortDescription')
2851
d89257f3 2852 multifeed_metadata_list = get_first(
2853 player_responses,
2854 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2855 expected_type=str)
2856 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2857 if self.get_param('noplaylist'):
11f9be09 2858 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 2859 else:
2860 entries = []
2861 feed_ids = []
2862 for feed in multifeed_metadata_list.split(','):
2863 # Unquote should take place before split on comma (,) since textual
2864 # fields may contain comma as well (see
2865 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2866 feed_data = compat_parse_qs(
2867 compat_urllib_parse_unquote_plus(feed))
2868
2869 def feed_entry(name):
2870 return try_get(
2871 feed_data, lambda x: x[name][0], compat_str)
2872
2873 feed_id = feed_entry('id')
2874 if not feed_id:
2875 continue
2876 feed_title = feed_entry('title')
2877 title = video_title
2878 if feed_title:
2879 title += ' (%s)' % feed_title
2880 entries.append({
2881 '_type': 'url_transparent',
2882 'ie_key': 'Youtube',
2883 'url': smuggle_url(
2884 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2885 {'force_singlefeed': True}),
2886 'title': title,
2887 })
2888 feed_ids.append(feed_id)
2889 self.to_screen(
2890 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2891 % (', '.join(feed_ids), video_id))
2892 return self.playlist_result(
2893 entries, video_id, video_title, video_description)
11f9be09 2894
adbc4ec4 2895 live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)
bf1317d2 2896
545cc85d 2897 if not formats:
11f9be09 2898 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 2899 self.report_drm(video_id)
11f9be09 2900 pemr = get_first(
2901 playability_statuses,
2902 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2903 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2904 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2905 if subreason:
545cc85d 2906 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2907 countries = get_first(microformats, 'availableCountries')
545cc85d 2908 if not countries:
2909 regions_allowed = search_meta('regionsAllowed')
2910 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2911 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2912 reason += f'. {subreason}'
545cc85d 2913 if reason:
b7da73eb 2914 self.raise_no_formats(reason, expected=True)
bf1317d2 2915
11f9be09 2916 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2917 if not keywords and webpage:
2918 keywords = [
2919 unescapeHTML(m.group('content'))
2920 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2921 for keyword in keywords:
2922 if keyword.startswith('yt:stretch='):
201c1459 2923 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2924 if mobj:
2925 # NB: float is intentional for forcing float division
2926 w, h = (float(v) for v in mobj.groups())
2927 if w > 0 and h > 0:
2928 ratio = w / h
2929 for f in formats:
2930 if f.get('vcodec') != 'none':
2931 f['stretched_ratio'] = ratio
2932 break
a709d873 2933 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 2934 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2935 if thumbnail_url:
2936 thumbnails.append({
2937 'url': thumbnail_url,
ff2751ac 2938 })
fccf5021 2939 original_thumbnails = thumbnails.copy()
2940
0ba692ac 2941 # The best resolution thumbnails sometimes does not appear in the webpage
2942 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2943 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 2944 thumbnail_names = [
2945 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
cca80fe6 2946 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2947 'mqdefault', 'mq1', 'mq2', 'mq3',
2948 'default', '1', '2', '3'
2949 ]
cca80fe6 2950 n_thumbnail_names = len(thumbnail_names)
0ba692ac 2951 thumbnails.extend({
2952 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2953 video_id=video_id, name=name, ext=ext,
2954 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2955 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2956 for thumb in thumbnails:
cca80fe6 2957 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2958 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2959 self._remove_duplicate_formats(thumbnails)
fccf5021 2960 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 2961
7ea65411 2962 category = get_first(microformats, 'category') or search_meta('genre')
2963 channel_id = str_or_none(
2964 get_first(video_details, 'channelId')
2965 or get_first(microformats, 'externalChannelId')
2966 or search_meta('channelId'))
2967 duration = int_or_none(
2968 get_first(video_details, 'lengthSeconds')
2969 or get_first(microformats, 'lengthSeconds')
2970 or parse_duration(search_meta('duration'))) or None
2971 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2972
2973 live_content = get_first(video_details, 'isLiveContent')
2974 is_upcoming = get_first(video_details, 'isUpcoming')
2975 if is_live is None:
2976 if is_upcoming or live_content is False:
2977 is_live = False
2978 if is_upcoming is None and (live_content or is_live):
2979 is_upcoming = False
adbc4ec4
THD
2980 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2981 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2982 if not duration and live_end_time and live_start_time:
2983 duration = live_end_time - live_start_time
2984
2985 if is_live and self.get_param('live_from_start'):
2986 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
7ea65411 2987
720c3099 2988 formats.extend(self._extract_storyboard(player_responses, duration))
2989
2990 # Source is given priority since formats that throttle are given lower source_preference
2991 # When throttling issue is fully fixed, remove this
2992 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2993
545cc85d 2994 info = {
2995 'id': video_id,
39ca3b5c 2996 'title': video_title,
545cc85d 2997 'formats': formats,
2998 'thumbnails': thumbnails,
fccf5021 2999 # The best thumbnail that we are sure exists. Prevents unnecessary
3000 # URL checking if user don't care about getting the best possible thumbnail
3001 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 3002 'description': video_description,
3003 'upload_date': unified_strdate(
11f9be09 3004 get_first(microformats, 'uploadDate')
545cc85d 3005 or search_meta('uploadDate')),
11f9be09 3006 'uploader': get_first(video_details, 'author'),
545cc85d 3007 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3008 'uploader_url': owner_profile_url,
3009 'channel_id': channel_id,
11f9be09 3010 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 3011 'duration': duration,
3012 'view_count': int_or_none(
11f9be09 3013 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3014 or search_meta('interactionCount')),
11f9be09 3015 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3016 'age_limit': 18 if (
11f9be09 3017 get_first(microformats, 'isFamilySafe') is False
545cc85d 3018 or search_meta('isFamilyFriendly') == 'false'
3019 or search_meta('og:restrictions:age') == '18+') else 0,
3020 'webpage_url': webpage_url,
3021 'categories': [category] if category else None,
3022 'tags': keywords,
11f9be09 3023 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 3024 'is_live': is_live,
3025 'was_live': (False if is_live or is_upcoming or live_content is False
3026 else None if is_live is None or is_upcoming is None
3027 else live_content),
3028 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
adbc4ec4 3029 'release_timestamp': live_start_time,
545cc85d 3030 }
b477fc13 3031
3944e7af 3032 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3033 if pctr:
ecdc9049 3034 def get_lang_code(track):
3035 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3036 or track.get('languageCode'))
3037
3038 # Converted into dicts to remove duplicates
3039 captions = {
3040 get_lang_code(sub): sub
3041 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3042 translation_languages = {
3043 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3044 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3045
774d79cc 3046 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3047 lang_subs = container.setdefault(lang_code, [])
545cc85d 3048 for fmt in self._SUBTITLE_FORMATS:
3049 query.update({
3050 'fmt': fmt,
3051 })
3052 lang_subs.append({
3053 'ext': fmt,
3054 'url': update_url_query(base_url, query),
774d79cc 3055 'name': sub_name,
545cc85d 3056 })
7e72694b 3057
ecdc9049 3058 subtitles, automatic_captions = {}, {}
3059 for lang_code, caption_track in captions.items():
3060 base_url = caption_track.get('baseUrl')
545cc85d 3061 if not base_url:
3062 continue
ecdc9049 3063 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 3064 if caption_track.get('kind') != 'asr':
545cc85d 3065 if not lang_code:
3066 continue
3067 process_language(
ecdc9049 3068 subtitles, base_url, lang_code, lang_name, {})
3069 if not caption_track.get('isTranslatable'):
3070 continue
3944e7af 3071 for trans_code, trans_name in translation_languages.items():
3072 if not trans_code:
545cc85d 3073 continue
ecdc9049 3074 if caption_track.get('kind') != 'asr':
3075 trans_code += f'-{lang_code}'
3076 trans_name += format_field(lang_name, template=' from %s')
545cc85d 3077 process_language(
ecdc9049 3078 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
3079 info['automatic_captions'] = automatic_captions
3080 info['subtitles'] = subtitles
7e72694b 3081
545cc85d 3082 parsed_url = compat_urllib_parse_urlparse(url)
3083 for component in [parsed_url.fragment, parsed_url.query]:
3084 query = compat_parse_qs(component)
3085 for k, v in query.items():
3086 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3087 d_k += '_time'
3088 if d_k not in info and k in s_ks:
3089 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3090
3091 # Youtube Music Auto-generated description
822b9d9c 3092 if video_description:
38d70284 3093 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 3094 if mobj:
822b9d9c
RA
3095 release_year = mobj.group('release_year')
3096 release_date = mobj.group('release_date')
3097 if release_date:
3098 release_date = release_date.replace('-', '')
3099 if not release_year:
545cc85d 3100 release_year = release_date[:4]
3101 info.update({
3102 'album': mobj.group('album'.strip()),
3103 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3104 'track': mobj.group('track').strip(),
3105 'release_date': release_date,
cc2db878 3106 'release_year': int_or_none(release_year),
545cc85d 3107 })
7e72694b 3108
545cc85d 3109 initial_data = None
3110 if webpage:
3111 initial_data = self._extract_yt_initial_variable(
3112 webpage, self._YT_INITIAL_DATA_RE, video_id,
3113 'yt initial data')
3114 if not initial_data:
99e9e001 3115 query = {'videoId': video_id}
3116 query.update(self._get_checkok_params())
109dd3b2 3117 initial_data = self._extract_response(
3118 item_id=video_id, ep='next', fatal=False,
99e9e001 3119 ytcfg=master_ytcfg, query=query,
3120 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 3121 note='Downloading initial data API JSON')
545cc85d 3122
c60ee3a2 3123 try:
3124 # This will error if there is no livechat
3125 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
ecdc9049 3126 info.setdefault('subtitles', {})['live_chat'] = [{
c60ee3a2 3127 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3128 'video_id': video_id,
3129 'ext': 'json',
f6745c49 3130 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3131 }]
3132 except (KeyError, IndexError, TypeError):
3133 pass
545cc85d 3134
3135 if initial_data:
7c365c21 3136 info['chapters'] = (
3137 self._extract_chapters_from_json(initial_data, duration)
3138 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3139 or None)
545cc85d 3140
3141 contents = try_get(
3142 initial_data,
3143 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3144 list) or []
3145 for content in contents:
3146 vpir = content.get('videoPrimaryInfoRenderer')
3147 if vpir:
3148 stl = vpir.get('superTitleLink')
3149 if stl:
fe93e2c4 3150 stl = self._get_text(stl)
545cc85d 3151 if try_get(
3152 vpir,
3153 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3154 info['location'] = stl
3155 else:
3156 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3157 if mobj:
3158 info.update({
3159 'series': mobj.group(1),
3160 'season_number': int(mobj.group(2)),
3161 'episode_number': int(mobj.group(3)),
3162 })
3163 for tlb in (try_get(
3164 vpir,
3165 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3166 list) or []):
3167 tbr = tlb.get('toggleButtonRenderer') or {}
3168 for getter, regex in [(
3169 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3170 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3171 lambda x: x['accessibility'],
3172 lambda x: x['accessibilityData']['accessibilityData'],
3173 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3174 label = (try_get(tbr, getter, dict) or {}).get('label')
3175 if label:
3176 mobj = re.match(regex, label)
3177 if mobj:
3178 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3179 break
3180 sbr_tooltip = try_get(
3181 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3182 if sbr_tooltip:
3183 like_count, dislike_count = sbr_tooltip.split(' / ')
3184 info.update({
3185 'like_count': str_to_int(like_count),
3186 'dislike_count': str_to_int(dislike_count),
3187 })
3188 vsir = content.get('videoSecondaryInfoRenderer')
3189 if vsir:
052e1350 3190 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3191 rows = try_get(
3192 vsir,
3193 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3194 list) or []
3195 multiple_songs = False
3196 for row in rows:
3197 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3198 multiple_songs = True
3199 break
3200 for row in rows:
3201 mrr = row.get('metadataRowRenderer') or {}
3202 mrr_title = mrr.get('title')
3203 if not mrr_title:
3204 continue
052e1350 3205 mrr_title = self._get_text(mrr, 'title')
3206 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3207 if mrr_title == 'License':
3208 info['license'] = mrr_contents_text
3209 elif not multiple_songs:
3210 if mrr_title == 'Album':
3211 info['album'] = mrr_contents_text
3212 elif mrr_title == 'Artist':
3213 info['artist'] = mrr_contents_text
3214 elif mrr_title == 'Song':
3215 info['track'] = mrr_contents_text
3216
3217 fallbacks = {
3218 'channel': 'uploader',
3219 'channel_id': 'uploader_id',
3220 'channel_url': 'uploader_url',
3221 }
3222 for to, frm in fallbacks.items():
3223 if not info.get(to):
3224 info[to] = info.get(frm)
3225
3226 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3227 v = info.get(s_k)
3228 if v:
3229 info[d_k] = v
b84071c0 3230
11f9be09 3231 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3232 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3233 is_membersonly = None
b28f8d24 3234 is_premium = None
c224251a
M
3235 if initial_data and is_private is not None:
3236 is_membersonly = False
b28f8d24 3237 is_premium = False
47193e02 3238 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3239 badge_labels = set()
3240 for content in contents:
3241 if not isinstance(content, dict):
3242 continue
3243 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3244 for badge_label in badge_labels:
3245 if badge_label.lower() == 'members only':
3246 is_membersonly = True
3247 elif badge_label.lower() == 'premium':
3248 is_premium = True
3249 elif badge_label.lower() == 'unlisted':
3250 is_unlisted = True
c224251a 3251
c224251a
M
3252 info['availability'] = self._availability(
3253 is_private=is_private,
b28f8d24 3254 needs_premium=is_premium,
c224251a
M
3255 needs_subscription=is_membersonly,
3256 needs_auth=info['age_limit'] >= 18,
3257 is_unlisted=None if is_private is None else is_unlisted)
3258
a2160aa4 3259 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3260
11f9be09 3261 self.mark_watched(video_id, player_responses)
d77ab8e2 3262
545cc85d 3263 return info
c5e8d7af 3264
a61fd4cf 3265
a6213a49 3266class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 3267
a6213a49 3268 def _extract_channel_id(self, webpage):
3269 channel_id = self._html_search_meta(
3270 'channelId', webpage, 'channel id', default=None)
3271 if channel_id:
3272 return channel_id
3273 channel_url = self._html_search_meta(
3274 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3275 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3276 'twitter:app:url:googleplay'), webpage, 'channel url')
3277 return self._search_regex(
3278 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3279 channel_url, 'channel id')
15f6397c 3280
8bdd16b4 3281 @staticmethod
cd7c66cf 3282 def _extract_basic_item_renderer(item):
3283 # Modified from _extract_grid_item_renderer
201c1459 3284 known_basic_renderers = (
3285 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3286 )
3287 for key, renderer in item.items():
201c1459 3288 if not isinstance(renderer, dict):
cd7c66cf 3289 continue
201c1459 3290 elif key in known_basic_renderers:
3291 return renderer
3292 elif key.startswith('grid') and key.endswith('Renderer'):
3293 return renderer
8bdd16b4 3294
8bdd16b4 3295 def _grid_entries(self, grid_renderer):
3296 for item in grid_renderer['items']:
3297 if not isinstance(item, dict):
39b62db1 3298 continue
cd7c66cf 3299 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3300 if not isinstance(renderer, dict):
3301 continue
052e1350 3302 title = self._get_text(renderer, 'title')
fe93e2c4 3303
8bdd16b4 3304 # playlist
3305 playlist_id = renderer.get('playlistId')
3306 if playlist_id:
3307 yield self.url_result(
3308 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3309 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3310 video_title=title)
201c1459 3311 continue
8bdd16b4 3312 # video
3313 video_id = renderer.get('videoId')
3314 if video_id:
3315 yield self._extract_video(renderer)
201c1459 3316 continue
8bdd16b4 3317 # channel
3318 channel_id = renderer.get('channelId')
3319 if channel_id:
8bdd16b4 3320 yield self.url_result(
3321 'https://www.youtube.com/channel/%s' % channel_id,
3322 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3323 continue
3324 # generic endpoint URL support
3325 ep_url = urljoin('https://www.youtube.com/', try_get(
3326 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3327 compat_str))
3328 if ep_url:
3329 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3330 if ie.suitable(ep_url):
3331 yield self.url_result(
3332 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3333 break
8bdd16b4 3334
3d3dddc9 3335 def _shelf_entries_from_content(self, shelf_renderer):
3336 content = shelf_renderer.get('content')
3337 if not isinstance(content, dict):
8bdd16b4 3338 return
cd7c66cf 3339 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3340 if renderer:
3341 # TODO: add support for nested playlists so each shelf is processed
3342 # as separate playlist
3343 # TODO: this includes only first N items
3344 for entry in self._grid_entries(renderer):
3345 yield entry
3346 renderer = content.get('horizontalListRenderer')
3347 if renderer:
3348 # TODO
3349 pass
8bdd16b4 3350
29f7c58a 3351 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3352 ep = try_get(
3353 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3354 compat_str)
3355 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3356 if shelf_url:
29f7c58a 3357 # Skipping links to another channels, note that checking for
3358 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3359 # will not work
3360 if skip_channels and '/channels?' in shelf_url:
3361 return
052e1350 3362 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3363 yield self.url_result(shelf_url, video_title=title)
3364 # Shelf may not contain shelf URL, fallback to extraction from content
3365 for entry in self._shelf_entries_from_content(shelf_renderer):
3366 yield entry
c5e8d7af 3367
8bdd16b4 3368 def _playlist_entries(self, video_list_renderer):
3369 for content in video_list_renderer['contents']:
3370 if not isinstance(content, dict):
3371 continue
3372 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3373 if not isinstance(renderer, dict):
3374 continue
3375 video_id = renderer.get('videoId')
3376 if not video_id:
3377 continue
3378 yield self._extract_video(renderer)
07aeced6 3379
3462ffa8 3380 def _rich_entries(self, rich_grid_renderer):
3381 renderer = try_get(
70d5c17b 3382 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3383 video_id = renderer.get('videoId')
3384 if not video_id:
3385 return
3386 yield self._extract_video(renderer)
3387
8bdd16b4 3388 def _video_entry(self, video_renderer):
3389 video_id = video_renderer.get('videoId')
3390 if video_id:
3391 return self._extract_video(video_renderer)
dacb3a86 3392
8bdd16b4 3393 def _post_thread_entries(self, post_thread_renderer):
3394 post_renderer = try_get(
3395 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3396 if not post_renderer:
3397 return
3398 # video attachment
3399 video_renderer = try_get(
895b0931 3400 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3401 video_id = video_renderer.get('videoId')
3402 if video_id:
3403 entry = self._extract_video(video_renderer)
8bdd16b4 3404 if entry:
3405 yield entry
895b0931 3406 # playlist attachment
3407 playlist_id = try_get(
3408 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3409 if playlist_id:
3410 yield self.url_result(
e28f1c0a 3411 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3412 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3413 # inline video links
3414 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3415 for run in runs:
3416 if not isinstance(run, dict):
3417 continue
3418 ep_url = try_get(
3419 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3420 if not ep_url:
3421 continue
3422 if not YoutubeIE.suitable(ep_url):
3423 continue
3424 ep_video_id = YoutubeIE._match_id(ep_url)
3425 if video_id == ep_video_id:
3426 continue
895b0931 3427 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3428
8bdd16b4 3429 def _post_thread_continuation_entries(self, post_thread_continuation):
3430 contents = post_thread_continuation.get('contents')
3431 if not isinstance(contents, list):
3432 return
3433 for content in contents:
3434 renderer = content.get('backstagePostThreadRenderer')
3435 if not isinstance(renderer, dict):
3436 continue
3437 for entry in self._post_thread_entries(renderer):
3438 yield entry
07aeced6 3439
39ed931e 3440 r''' # unused
3441 def _rich_grid_entries(self, contents):
3442 for content in contents:
3443 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3444 if video_renderer:
3445 entry = self._video_entry(video_renderer)
3446 if entry:
3447 yield entry
3448 '''
a6213a49 3449 def _extract_entries(self, parent_renderer, continuation_list):
3450 # continuation_list is modified in-place with continuation_list = [continuation_token]
3451 continuation_list[:] = [None]
3452 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3453 for content in contents:
3454 if not isinstance(content, dict):
3455 continue
3456 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3457 if not is_renderer:
3458 renderer = content.get('richItemRenderer')
3459 if renderer:
3460 for entry in self._rich_entries(renderer):
3461 yield entry
3462 continuation_list[0] = self._extract_continuation(parent_renderer)
3463 continue
3464 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3465 for isr_content in isr_contents:
3466 if not isinstance(isr_content, dict):
8bdd16b4 3467 continue
69184e41 3468
a6213a49 3469 known_renderers = {
3470 'playlistVideoListRenderer': self._playlist_entries,
3471 'gridRenderer': self._grid_entries,
3472 'shelfRenderer': lambda x: self._shelf_entries(x),
3473 'backstagePostThreadRenderer': self._post_thread_entries,
3474 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 3475 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3476 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
a6213a49 3477 }
3478 for key, renderer in isr_content.items():
3479 if key not in known_renderers:
3480 continue
3481 for entry in known_renderers[key](renderer):
3482 if entry:
3483 yield entry
3484 continuation_list[0] = self._extract_continuation(renderer)
3485 break
70d5c17b 3486
3487 if not continuation_list[0]:
a6213a49 3488 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 3489
a6213a49 3490 if not continuation_list[0]:
3491 continuation_list[0] = self._extract_continuation(parent_renderer)
3492
3493 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3494 continuation_list = [None]
3495 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 3496 tab_content = try_get(tab, lambda x: x['content'], dict)
3497 if not tab_content:
3498 return
3462ffa8 3499 parent_renderer = (
29f7c58a 3500 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3501 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3502 for entry in extract_entries(parent_renderer):
3503 yield entry
3462ffa8 3504 continuation = continuation_list[0]
d069eca7 3505
8bdd16b4 3506 for page_num in itertools.count(1):
3507 if not continuation:
3508 break
99e9e001 3509 headers = self.generate_api_headers(
3510 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 3511 response = self._extract_response(
3512 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3513 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3514 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3515
3516 if not response:
8bdd16b4 3517 break
ac56cf38 3518 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3519 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3520 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 3521
69184e41 3522 known_continuation_renderers = {
3523 'playlistVideoListContinuation': self._playlist_entries,
3524 'gridContinuation': self._grid_entries,
3525 'itemSectionContinuation': self._post_thread_continuation_entries,
3526 'sectionListContinuation': extract_entries, # for feeds
3527 }
8bdd16b4 3528 continuation_contents = try_get(
69184e41 3529 response, lambda x: x['continuationContents'], dict) or {}
3530 continuation_renderer = None
3531 for key, value in continuation_contents.items():
3532 if key not in known_continuation_renderers:
3462ffa8 3533 continue
69184e41 3534 continuation_renderer = value
3535 continuation_list = [None]
3536 for entry in known_continuation_renderers[key](continuation_renderer):
3537 yield entry
3538 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3539 break
3540 if continuation_renderer:
3541 continue
c5e8d7af 3542
a1b535bd 3543 known_renderers = {
3544 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3545 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3546 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3547 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3548 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3549 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3550 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3551 }
cce889b9 3552 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3553 continuation_items = try_get(
cce889b9 3554 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3555 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3556 video_items_renderer = None
3557 for key, value in continuation_item.items():
3558 if key not in known_renderers:
8bdd16b4 3559 continue
a1b535bd 3560 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3561 continuation_list = [None]
a1b535bd 3562 for entry in known_renderers[key][0](video_items_renderer):
3563 yield entry
9ba5705a 3564 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3565 break
3566 if video_items_renderer:
3567 continue
8bdd16b4 3568 break
9558dcec 3569
8bdd16b4 3570 @staticmethod
3571 def _extract_selected_tab(tabs):
3572 for tab in tabs:
cd684175 3573 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3574 if renderer.get('selected') is True:
3575 return renderer
2b3c2546 3576 else:
8bdd16b4 3577 raise ExtractorError('Unable to find selected tab')
b82f815f 3578
47193e02 3579 @classmethod
3580 def _extract_uploader(cls, data):
8bdd16b4 3581 uploader = {}
47193e02 3582 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3583 owner = try_get(
3584 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3585 if owner:
3586 uploader['uploader'] = owner.get('text')
3587 uploader['uploader_id'] = try_get(
3588 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3589 uploader['uploader_url'] = urljoin(
3590 'https://www.youtube.com/',
3591 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3592 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3593
ac56cf38 3594 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 3595 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 3596 tags = []
b60419c5 3597
8bdd16b4 3598 selected_tab = self._extract_selected_tab(tabs)
3599 renderer = try_get(
3600 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3601 if renderer:
b60419c5 3602 channel_name = renderer.get('title')
3603 channel_url = renderer.get('channelUrl')
3604 channel_id = renderer.get('externalId')
39ed931e 3605 else:
64c0d954 3606 renderer = try_get(
3607 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3608
8bdd16b4 3609 if renderer:
3610 title = renderer.get('title')
ecc97af3 3611 description = renderer.get('description', '')
b60419c5 3612 playlist_id = channel_id
3613 tags = renderer.get('keywords', '').split()
b60419c5 3614
a709d873 3615 thumbnails = (
3616 self._extract_thumbnails(renderer, 'avatar')
3617 or self._extract_thumbnails(
3618 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3619 ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail')))
3620
3462ffa8 3621 if playlist_id is None:
70d5c17b 3622 playlist_id = item_id
3623 if title is None:
39ed931e 3624 title = (
3625 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3626 or playlist_id)
b60419c5 3627 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3628 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3629 metadata = {
3630 'playlist_id': playlist_id,
3631 'playlist_title': title,
3632 'playlist_description': description,
3633 'uploader': channel_name,
3634 'uploader_id': channel_id,
3635 'uploader_url': channel_url,
3636 'thumbnails': thumbnails,
3637 'tags': tags,
3638 }
47193e02 3639 availability = self._extract_availability(data)
3640 if availability:
3641 metadata['availability'] = availability
b60419c5 3642 if not channel_id:
3643 metadata.update(self._extract_uploader(data))
3644 metadata.update({
3645 'channel': metadata['uploader'],
3646 'channel_id': metadata['uploader_id'],
3647 'channel_url': metadata['uploader_url']})
3648 return self.playlist_result(
d069eca7 3649 self._entries(
ac56cf38 3650 selected_tab, playlist_id, ytcfg,
3651 self._extract_account_syncid(ytcfg, data),
3652 self._extract_visitor_data(data, ytcfg)),
b60419c5 3653 **metadata)
73c4ac2c 3654
ac56cf38 3655 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3656 first_id = last_id = response = None
2be71994 3657 for page_num in itertools.count(1):
cd7c66cf 3658 videos = list(self._playlist_entries(playlist))
3659 if not videos:
3660 return
2be71994 3661 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3662 if start >= len(videos):
3663 return
3664 for video in videos[start:]:
3665 if video['id'] == first_id:
3666 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3667 return
3668 yield video
3669 first_id = first_id or videos[0]['id']
3670 last_id = videos[-1]['id']
79360d99 3671 watch_endpoint = try_get(
3672 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 3673 headers = self.generate_api_headers(
3674 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3675 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 3676 query = {
3677 'playlistId': playlist_id,
3678 'videoId': watch_endpoint.get('videoId') or last_id,
3679 'index': watch_endpoint.get('index') or len(videos),
3680 'params': watch_endpoint.get('params') or 'OAE%3D'
3681 }
3682 response = self._extract_response(
3683 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3684 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3685 check_get_keys='contents'
3686 )
cd7c66cf 3687 playlist = try_get(
79360d99 3688 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3689
ac56cf38 3690 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 3691 title = playlist.get('title') or try_get(
3692 data, lambda x: x['titleText']['simpleText'], compat_str)
3693 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3694
3695 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3696 playlist_url = urljoin(url, try_get(
3697 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3698 compat_str))
3699 if playlist_url and playlist_url != url:
3700 return self.url_result(
3701 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3702 video_title=title)
cd7c66cf 3703
8bdd16b4 3704 return self.playlist_result(
ac56cf38 3705 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 3706 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3707
47193e02 3708 def _extract_availability(self, data):
3709 """
3710 Gets the availability of a given playlist/tab.
3711 Note: Unless YouTube tells us explicitly, we do not assume it is public
3712 @param data: response
3713 """
3714 is_private = is_unlisted = None
3715 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3716 badge_labels = self._extract_badges(renderer)
3717
3718 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3719 privacy_dropdown_entries = try_get(
3720 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3721 for renderer_dict in privacy_dropdown_entries:
3722 is_selected = try_get(
3723 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3724 if not is_selected:
3725 continue
052e1350 3726 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 3727 if label:
3728 badge_labels.add(label.lower())
3729 break
3730
3731 for badge_label in badge_labels:
3732 if badge_label == 'unlisted':
3733 is_unlisted = True
3734 elif badge_label == 'private':
3735 is_private = True
3736 elif badge_label == 'public':
3737 is_unlisted = is_private = False
3738 return self._availability(is_private, False, False, False, is_unlisted)
3739
3740 @staticmethod
3741 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3742 sidebar_renderer = try_get(
3743 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3744 for item in sidebar_renderer:
3745 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3746 if renderer:
3747 return renderer
3748
ac56cf38 3749 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 3750 """
3751 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3752 """
5d342002 3753 browse_id = params = None
47193e02 3754 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3755 if not renderer:
3756 return
3757 menu_renderer = try_get(
3758 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3759 for menu_item in menu_renderer:
3760 if not isinstance(menu_item, dict):
358de58c 3761 continue
47193e02 3762 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3763 text = try_get(
3764 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3765 if not text or text.lower() != 'show unavailable videos':
3766 continue
3767 browse_endpoint = try_get(
3768 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3769 browse_id = browse_endpoint.get('browseId')
3770 params = browse_endpoint.get('params')
3771 break
5d342002 3772
11f9be09 3773 headers = self.generate_api_headers(
99e9e001 3774 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 3775 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 3776 query = {
3777 'params': params or 'wgYCCAA=',
3778 'browseId': browse_id or 'VL%s' % item_id
3779 }
3780 return self._extract_response(
3781 item_id=item_id, headers=headers, query=query,
fe93e2c4 3782 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 3783 note='Downloading API JSON with unavailable videos')
358de58c 3784
ac56cf38 3785 def _extract_webpage(self, url, item_id, fatal=True):
a06916d9 3786 retries = self.get_param('extractor_retries', 3)
62bff2c1 3787 count = -1
ac56cf38 3788 webpage = data = last_error = None
14fdfea9 3789 while count < retries:
62bff2c1 3790 count += 1
14fdfea9 3791 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3792 # See: https://github.com/yt-dlp/yt-dlp/issues/116
ac56cf38 3793 if last_error:
c705177d 3794 self.report_warning('%s. Retrying ...' % last_error)
ac56cf38 3795 try:
3796 webpage = self._download_webpage(
3797 url, item_id,
3798 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3799 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3800 except ExtractorError as e:
3801 if isinstance(e.cause, network_exceptions):
3802 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3803 last_error = error_to_compat_str(e.cause or e.msg)
3804 if count < retries:
3805 continue
3806 if fatal:
3807 raise
3808 self.report_warning(error_to_compat_str(e))
14fdfea9 3809 break
ac56cf38 3810 else:
3811 try:
3812 self._extract_and_report_alerts(data)
3813 except ExtractorError as e:
3814 if fatal:
3815 raise
3816 self.report_warning(error_to_compat_str(e))
3817 break
3818
3819 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3820 break
3821
3822 last_error = 'Incomplete yt initial data received'
3823 if count >= retries:
3824 if fatal:
3825 raise ExtractorError(last_error)
3826 self.report_warning(last_error)
3827 break
3828
cd7c66cf 3829 return webpage, data
3830
ac56cf38 3831 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3832 data = None
3833 if 'webpage' not in self._configuration_arg('skip'):
3834 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3835 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3836 if not data:
3837 if not ytcfg and self.is_authenticated:
3838 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3839 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3840 raise ExtractorError(
3841 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3842 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3843 expected=True)
3844 self.report_warning(msg, only_once=True)
3845 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3846 return data, ytcfg
3847
3848 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3849 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3850 resolve_response = self._extract_response(
3851 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3852 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3853 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3854 for ep_key, ep in endpoints.items():
3855 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3856 if params:
3857 return self._extract_response(
3858 item_id=item_id, query=params, ep=ep, headers=headers,
3859 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3860 check_get_keys=('contents', 'currentVideoEndpoint'))
3861 err_note = 'Failed to resolve url (does the playlist exist?)'
3862 if fatal:
3863 raise ExtractorError(err_note, expected=True)
3864 self.report_warning(err_note, item_id)
3865
a6213a49 3866 @staticmethod
3867 def _smuggle_data(entries, data):
3868 for entry in entries:
3869 if data:
3870 entry['url'] = smuggle_url(entry['url'], data)
3871 yield entry
3872
3873 _SEARCH_PARAMS = None
3874
3875 def _search_results(self, query, params=NO_DEFAULT):
3876 data = {'query': query}
3877 if params is NO_DEFAULT:
3878 params = self._SEARCH_PARAMS
3879 if params:
3880 data['params'] = params
a61fd4cf 3881 continuation_list = [None]
a6213a49 3882 for page_num in itertools.count(1):
a61fd4cf 3883 data.update(continuation_list[0] or {})
a6213a49 3884 search = self._extract_response(
3885 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
a61fd4cf 3886 check_get_keys=('contents', 'onResponseReceivedCommands'))
a6213a49 3887 slr_contents = try_get(
3888 search,
3889 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3890 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3891 list)
a61fd4cf 3892 yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3893 if not continuation_list[0]:
a6213a49 3894 break
3895
3896
3897class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3898 IE_DESC = 'YouTube Tabs'
3899 _VALID_URL = r'''(?x:
3900 https?://
3901 (?:\w+\.)?
3902 (?:
3903 youtube(?:kids)?\.com|
3904 %(invidious)s
3905 )/
3906 (?:
3907 (?P<channel_type>channel|c|user|browse)/|
3908 (?P<not_channel>
3909 feed/|hashtag/|
3910 (?:playlist|watch)\?.*?\blist=
3911 )|
3912 (?!(?:%(reserved_names)s)\b) # Direct URLs
3913 )
3914 (?P<id>[^/?\#&]+)
3915 )''' % {
3916 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3917 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3918 }
3919 IE_NAME = 'youtube:tab'
3920
3921 _TESTS = [{
3922 'note': 'playlists, multipage',
3923 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3924 'playlist_mincount': 94,
3925 'info_dict': {
3926 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3927 'title': 'Игорь Клейнер - Playlists',
3928 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3929 'uploader': 'Игорь Клейнер',
3930 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3931 },
3932 }, {
3933 'note': 'playlists, multipage, different order',
3934 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3935 'playlist_mincount': 94,
3936 'info_dict': {
3937 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3938 'title': 'Игорь Клейнер - Playlists',
3939 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3940 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3941 'uploader': 'Игорь Клейнер',
3942 },
3943 }, {
3944 'note': 'playlists, series',
3945 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3946 'playlist_mincount': 5,
3947 'info_dict': {
3948 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3949 'title': '3Blue1Brown - Playlists',
3950 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3951 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3952 'uploader': '3Blue1Brown',
3953 },
3954 }, {
3955 'note': 'playlists, singlepage',
3956 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3957 'playlist_mincount': 4,
3958 'info_dict': {
3959 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3960 'title': 'ThirstForScience - Playlists',
3961 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3962 'uploader': 'ThirstForScience',
3963 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3964 }
3965 }, {
3966 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3967 'only_matching': True,
3968 }, {
3969 'note': 'basic, single video playlist',
3970 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3971 'info_dict': {
3972 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3973 'uploader': 'Sergey M.',
3974 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3975 'title': 'youtube-dl public playlist',
3976 },
3977 'playlist_count': 1,
3978 }, {
3979 'note': 'empty playlist',
3980 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3981 'info_dict': {
3982 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3983 'uploader': 'Sergey M.',
3984 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3985 'title': 'youtube-dl empty playlist',
3986 },
3987 'playlist_count': 0,
3988 }, {
3989 'note': 'Home tab',
3990 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3991 'info_dict': {
3992 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3993 'title': 'lex will - Home',
3994 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3995 'uploader': 'lex will',
3996 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3997 },
3998 'playlist_mincount': 2,
3999 }, {
4000 'note': 'Videos tab',
4001 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4002 'info_dict': {
4003 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4004 'title': 'lex will - Videos',
4005 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4006 'uploader': 'lex will',
4007 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4008 },
4009 'playlist_mincount': 975,
4010 }, {
4011 'note': 'Videos tab, sorted by popular',
4012 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4013 'info_dict': {
4014 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4015 'title': 'lex will - Videos',
4016 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4017 'uploader': 'lex will',
4018 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4019 },
4020 'playlist_mincount': 199,
4021 }, {
4022 'note': 'Playlists tab',
4023 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4024 'info_dict': {
4025 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4026 'title': 'lex will - Playlists',
4027 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4028 'uploader': 'lex will',
4029 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4030 },
4031 'playlist_mincount': 17,
4032 }, {
4033 'note': 'Community tab',
4034 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4035 'info_dict': {
4036 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4037 'title': 'lex will - Community',
4038 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4039 'uploader': 'lex will',
4040 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4041 },
4042 'playlist_mincount': 18,
4043 }, {
4044 'note': 'Channels tab',
4045 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4046 'info_dict': {
4047 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4048 'title': 'lex will - Channels',
4049 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4050 'uploader': 'lex will',
4051 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4052 },
4053 'playlist_mincount': 12,
4054 }, {
4055 'note': 'Search tab',
4056 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4057 'playlist_mincount': 40,
4058 'info_dict': {
4059 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4060 'title': '3Blue1Brown - Search - linear algebra',
4061 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4062 'uploader': '3Blue1Brown',
4063 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4064 },
4065 }, {
4066 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4067 'only_matching': True,
4068 }, {
4069 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4070 'only_matching': True,
4071 }, {
4072 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4073 'only_matching': True,
4074 }, {
4075 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4076 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4077 'info_dict': {
4078 'title': '29C3: Not my department',
4079 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4080 'uploader': 'Christiaan008',
4081 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4082 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
4083 },
4084 'playlist_count': 96,
4085 }, {
4086 'note': 'Large playlist',
4087 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4088 'info_dict': {
4089 'title': 'Uploads from Cauchemar',
4090 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4091 'uploader': 'Cauchemar',
4092 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4093 },
4094 'playlist_mincount': 1123,
4095 }, {
4096 'note': 'even larger playlist, 8832 videos',
4097 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4098 'only_matching': True,
4099 }, {
4100 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4101 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4102 'info_dict': {
4103 'title': 'Uploads from Interstellar Movie',
4104 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4105 'uploader': 'Interstellar Movie',
4106 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4107 },
4108 'playlist_mincount': 21,
4109 }, {
4110 'note': 'Playlist with "show unavailable videos" button',
4111 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4112 'info_dict': {
4113 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4114 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4115 'uploader': 'Phim Siêu Nhân Nhật Bản',
4116 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4117 },
4118 'playlist_mincount': 200,
4119 }, {
4120 'note': 'Playlist with unavailable videos in page 7',
4121 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
4122 'info_dict': {
4123 'title': 'Uploads from BlankTV',
4124 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4125 'uploader': 'BlankTV',
4126 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4127 },
4128 'playlist_mincount': 1000,
4129 }, {
4130 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4131 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4132 'info_dict': {
4133 'title': 'Data Analysis with Dr Mike Pound',
4134 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4135 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4136 'uploader': 'Computerphile',
4137 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
4138 },
4139 'playlist_mincount': 11,
4140 }, {
4141 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4142 'only_matching': True,
4143 }, {
4144 'note': 'Playlist URL that does not actually serve a playlist',
4145 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4146 'info_dict': {
4147 'id': 'FqZTN594JQw',
4148 'ext': 'webm',
4149 'title': "Smiley's People 01 detective, Adventure Series, Action",
4150 'uploader': 'STREEM',
4151 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4152 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4153 'upload_date': '20150526',
4154 'license': 'Standard YouTube License',
4155 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4156 'categories': ['People & Blogs'],
4157 'tags': list,
4158 'view_count': int,
4159 'like_count': int,
4160 'dislike_count': int,
4161 },
4162 'params': {
4163 'skip_download': True,
4164 },
4165 'skip': 'This video is not available.',
4166 'add_ie': [YoutubeIE.ie_key()],
4167 }, {
4168 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4169 'only_matching': True,
4170 }, {
4171 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4172 'only_matching': True,
4173 }, {
4174 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4175 'info_dict': {
4176 'id': '3yImotZU3tw', # This will keep changing
4177 'ext': 'mp4',
4178 'title': compat_str,
4179 'uploader': 'Sky News',
4180 'uploader_id': 'skynews',
4181 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4182 'upload_date': r're:\d{8}',
4183 'description': compat_str,
4184 'categories': ['News & Politics'],
4185 'tags': list,
4186 'like_count': int,
4187 'dislike_count': int,
4188 },
4189 'params': {
4190 'skip_download': True,
4191 },
4192 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4193 }, {
4194 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4195 'info_dict': {
4196 'id': 'a48o2S1cPoo',
4197 'ext': 'mp4',
4198 'title': 'The Young Turks - Live Main Show',
4199 'uploader': 'The Young Turks',
4200 'uploader_id': 'TheYoungTurks',
4201 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4202 'upload_date': '20150715',
4203 'license': 'Standard YouTube License',
4204 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4205 'categories': ['News & Politics'],
4206 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4207 'like_count': int,
4208 'dislike_count': int,
4209 },
4210 'params': {
4211 'skip_download': True,
4212 },
4213 'only_matching': True,
4214 }, {
4215 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4216 'only_matching': True,
4217 }, {
4218 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4219 'only_matching': True,
4220 }, {
4221 'note': 'A channel that is not live. Should raise error',
4222 'url': 'https://www.youtube.com/user/numberphile/live',
4223 'only_matching': True,
4224 }, {
4225 'url': 'https://www.youtube.com/feed/trending',
4226 'only_matching': True,
4227 }, {
4228 'url': 'https://www.youtube.com/feed/library',
4229 'only_matching': True,
4230 }, {
4231 'url': 'https://www.youtube.com/feed/history',
4232 'only_matching': True,
4233 }, {
4234 'url': 'https://www.youtube.com/feed/subscriptions',
4235 'only_matching': True,
4236 }, {
4237 'url': 'https://www.youtube.com/feed/watch_later',
4238 'only_matching': True,
4239 }, {
4240 'note': 'Recommended - redirects to home page.',
4241 'url': 'https://www.youtube.com/feed/recommended',
4242 'only_matching': True,
4243 }, {
4244 'note': 'inline playlist with not always working continuations',
4245 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4246 'only_matching': True,
4247 }, {
4248 'url': 'https://www.youtube.com/course',
4249 'only_matching': True,
4250 }, {
4251 'url': 'https://www.youtube.com/zsecurity',
4252 'only_matching': True,
4253 }, {
4254 'url': 'http://www.youtube.com/NASAgovVideo/videos',
4255 'only_matching': True,
4256 }, {
4257 'url': 'https://www.youtube.com/TheYoungTurks/live',
4258 'only_matching': True,
4259 }, {
4260 'url': 'https://www.youtube.com/hashtag/cctv9',
4261 'info_dict': {
4262 'id': 'cctv9',
4263 'title': '#cctv9',
4264 },
4265 'playlist_mincount': 350,
4266 }, {
4267 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4268 'only_matching': True,
4269 }, {
4270 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4271 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4272 'only_matching': True
4273 }, {
4274 'note': '/browse/ should redirect to /channel/',
4275 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4276 'only_matching': True
4277 }, {
4278 'note': 'VLPL, should redirect to playlist?list=PL...',
4279 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4280 'info_dict': {
4281 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4282 'uploader': 'NoCopyrightSounds',
4283 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4284 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4285 'title': 'NCS Releases',
4286 },
4287 'playlist_mincount': 166,
4288 }, {
4289 'note': 'Topic, should redirect to playlist?list=UU...',
4290 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4291 'info_dict': {
4292 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4293 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4294 'title': 'Uploads from Royalty Free Music - Topic',
4295 'uploader': 'Royalty Free Music - Topic',
4296 },
4297 'expected_warnings': [
4298 'A channel/user page was given',
4299 'The URL does not have a videos tab',
4300 ],
4301 'playlist_mincount': 101,
4302 }, {
4303 'note': 'Topic without a UU playlist',
4304 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4305 'info_dict': {
4306 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4307 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4308 },
4309 'expected_warnings': [
4310 'A channel/user page was given',
4311 'The URL does not have a videos tab',
4312 'Falling back to channel URL',
4313 ],
4314 'playlist_mincount': 9,
4315 }, {
4316 'note': 'Youtube music Album',
4317 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4318 'info_dict': {
4319 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4320 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4321 },
4322 'playlist_count': 50,
4323 }, {
4324 'note': 'unlisted single video playlist',
4325 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4326 'info_dict': {
4327 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4328 'uploader': 'colethedj',
4329 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4330 'title': 'yt-dlp unlisted playlist test',
4331 'availability': 'unlisted'
4332 },
4333 'playlist_count': 1,
4334 }, {
4335 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4336 'url': 'https://www.youtube.com/feed/recommended',
4337 'info_dict': {
4338 'id': 'recommended',
4339 'title': 'recommended',
4340 },
4341 'playlist_mincount': 50,
4342 'params': {
4343 'skip_download': True,
4344 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4345 },
4346 }, {
4347 'note': 'API Fallback: /videos tab, sorted by oldest first',
4348 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4349 'info_dict': {
4350 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4351 'title': 'Cody\'sLab - Videos',
4352 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4353 'uploader': 'Cody\'sLab',
4354 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4355 },
4356 'playlist_mincount': 650,
4357 'params': {
4358 'skip_download': True,
4359 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4360 },
4361 }, {
4362 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4363 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4364 'info_dict': {
4365 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4366 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4367 'title': 'Uploads from Royalty Free Music - Topic',
4368 'uploader': 'Royalty Free Music - Topic',
4369 },
4370 'expected_warnings': [
4371 'A channel/user page was given',
4372 'The URL does not have a videos tab',
4373 ],
4374 'playlist_mincount': 101,
4375 'params': {
4376 'skip_download': True,
4377 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4378 },
4379 }]
4380
4381 @classmethod
4382 def suitable(cls, url):
4383 return False if YoutubeIE.suitable(url) else super(
4384 YoutubeTabIE, cls).suitable(url)
9297939e 4385
cd7c66cf 4386 def _real_extract(self, url):
9297939e 4387 url, smuggled_data = unsmuggle_url(url, {})
4388 if self.is_music_url(url):
4389 smuggled_data['is_music_url'] = True
fe03a6cd 4390 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4391 if info_dict.get('entries'):
4392 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4393 return info_dict
4394
37e57a9f 4395 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$')
fe03a6cd 4396
4397 def __real_extract(self, url, smuggled_data):
cd7c66cf 4398 item_id = self._match_id(url)
4399 url = compat_urlparse.urlunparse(
4400 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4401 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4402
fe03a6cd 4403 def get_mobj(url):
37e57a9f 4404 mobj = self._URL_RE.match(url).groupdict()
07cce701 4405 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4406 return mobj
4407
37e57a9f 4408 mobj, redirect_warning = get_mobj(url), None
fe03a6cd 4409 # Youtube returns incomplete data if tabname is not lower case
4410 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 4411 if is_channel:
4412 if smuggled_data.get('is_music_url'):
37e57a9f 4413 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
fe03a6cd 4414 item_id = item_id[2:]
37e57a9f 4415 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
4416 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
ac56cf38 4417 mdata = self._extract_tab_endpoint(
37e57a9f 4418 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
4419 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
4420 get_all=False, expected_type=compat_str)
ac56cf38 4421 if not murl:
37e57a9f 4422 raise ExtractorError('Failed to resolve album to playlist')
ac56cf38 4423 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
37e57a9f 4424 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
4425 pre = f'https://www.youtube.com/channel/{item_id}'
4426
fe03a6cd 4427 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4428 # Home URLs should redirect to /videos/
37e57a9f 4429 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
4430 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4431 tab = '/videos'
4432
4433 url = ''.join((pre, tab, post))
4434 mobj = get_mobj(url)
cd7c66cf 4435
4436 # Handle both video/playlist URLs
201c1459 4437 qs = parse_qs(url)
37e57a9f 4438 video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]
cd7c66cf 4439
fe03a6cd 4440 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4441 if not playlist_id:
fe03a6cd 4442 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4443 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4444 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 4445 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
4446 url = f'https://www.youtube.com/playlist?list={playlist_id}'
18db7548 4447 mobj = get_mobj(url)
cd7c66cf 4448
4449 if video_id and playlist_id:
a06916d9 4450 if self.get_param('noplaylist'):
37e57a9f 4451 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4452 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4453 ie=YoutubeIE.ie_key(), video_id=video_id)
4454 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
cd7c66cf 4455
ac56cf38 4456 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 4457
37e57a9f 4458 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
18db7548 4459 if tabs:
4460 selected_tab = self._extract_selected_tab(tabs)
4461 tab_name = selected_tab.get('title', '')
09f1580e 4462 if 'no-youtube-channel-redirect' not in compat_opts:
4463 if mobj['tab'] == '/live':
4464 # Live tab should have redirected to the video
4465 raise ExtractorError('The channel is not currently live', expected=True)
4466 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
37e57a9f 4467 redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab'
09f1580e 4468 if not mobj['not_channel'] and item_id[:2] == 'UC':
4469 # Topic channels don't have /videos. Use the equivalent playlist instead
37e57a9f 4470 pl_id = f'UU{item_id[2:]}'
4471 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
09f1580e 4472 try:
37e57a9f 4473 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True)
09f1580e 4474 except ExtractorError:
37e57a9f 4475 redirect_warning += ' and the playlist redirect gave error'
4476 else:
4477 item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:]
4478 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
4479 if tab_name.lower() != mobj['tab'][1:]:
4480 redirect_warning += f'. {tab_name} tab is being downloaded instead'
18db7548 4481
37e57a9f 4482 if redirect_warning:
4483 self.report_warning(redirect_warning)
4484 self.write_debug(f'Final URL: {url}')
18db7548 4485
358de58c 4486 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4487 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 4488 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 4489 self._extract_and_report_alerts(data, only_once=True)
37e57a9f 4490 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
8bdd16b4 4491 if tabs:
ac56cf38 4492 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 4493
37e57a9f 4494 playlist = traverse_obj(
4495 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 4496 if playlist:
ac56cf38 4497 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 4498
37e57a9f 4499 video_id = traverse_obj(
4500 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 4501 if video_id:
09f1580e 4502 if mobj['tab'] != '/live': # live tab is expected to redirect to video
37e57a9f 4503 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
4504 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4505 ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4506
8bdd16b4 4507 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4508
c5e8d7af 4509
8bdd16b4 4510class YoutubePlaylistIE(InfoExtractor):
96565c7e 4511 IE_DESC = 'YouTube playlists'
8bdd16b4 4512 _VALID_URL = r'''(?x)(?:
4513 (?:https?://)?
4514 (?:\w+\.)?
4515 (?:
4516 (?:
4517 youtube(?:kids)?\.com|
d9190e44 4518 %(invidious)s
8bdd16b4 4519 )
4520 /.*?\?.*?\blist=
4521 )?
4522 (?P<id>%(playlist_id)s)
d9190e44
RH
4523 )''' % {
4524 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4525 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4526 }
8bdd16b4 4527 IE_NAME = 'youtube:playlist'
cdc628a4 4528 _TESTS = [{
8bdd16b4 4529 'note': 'issue #673',
4530 'url': 'PLBB231211A4F62143',
cdc628a4 4531 'info_dict': {
8bdd16b4 4532 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4533 'id': 'PLBB231211A4F62143',
4534 'uploader': 'Wickydoo',
4535 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4536 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4537 },
4538 'playlist_mincount': 29,
4539 }, {
4540 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4541 'info_dict': {
4542 'title': 'YDL_safe_search',
4543 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4544 },
4545 'playlist_count': 2,
4546 'skip': 'This playlist is private',
9558dcec 4547 }, {
8bdd16b4 4548 'note': 'embedded',
4549 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4550 'playlist_count': 4,
9558dcec 4551 'info_dict': {
8bdd16b4 4552 'title': 'JODA15',
4553 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4554 'uploader': 'milan',
4555 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4556 }
cdc628a4 4557 }, {
8bdd16b4 4558 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4559 'playlist_mincount': 654,
8bdd16b4 4560 'info_dict': {
4561 'title': '2018 Chinese New Singles (11/6 updated)',
4562 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4563 'uploader': 'LBK',
4564 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4565 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4566 }
daa0df9e 4567 }, {
29f7c58a 4568 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4569 'only_matching': True,
4570 }, {
4571 # music album playlist
4572 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4573 'only_matching': True,
4574 }]
4575
4576 @classmethod
4577 def suitable(cls, url):
201c1459 4578 if YoutubeTabIE.suitable(url):
4579 return False
49a57e70 4580 from ..utils import parse_qs
201c1459 4581 qs = parse_qs(url)
4582 if qs.get('v', [None])[0]:
4583 return False
4584 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4585
4586 def _real_extract(self, url):
4587 playlist_id = self._match_id(url)
46953e7e 4588 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4589 url = update_url_query(
4590 'https://www.youtube.com/playlist',
4591 parse_qs(url) or {'list': playlist_id})
4592 if is_music_url:
4593 url = smuggle_url(url, {'is_music_url': True})
4594 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4595
4596
4597class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4598 IE_DESC = 'youtu.be'
29f7c58a 4599 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4600 _TESTS = [{
8bdd16b4 4601 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4602 'info_dict': {
4603 'id': 'yeWKywCrFtk',
4604 'ext': 'mp4',
4605 'title': 'Small Scale Baler and Braiding Rugs',
4606 'uploader': 'Backus-Page House Museum',
4607 'uploader_id': 'backuspagemuseum',
4608 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4609 'upload_date': '20161008',
4610 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4611 'categories': ['Nonprofits & Activism'],
4612 'tags': list,
4613 'like_count': int,
4614 'dislike_count': int,
4615 },
4616 'params': {
4617 'noplaylist': True,
4618 'skip_download': True,
4619 },
39e7107d 4620 }, {
8bdd16b4 4621 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4622 'only_matching': True,
cdc628a4
PH
4623 }]
4624
8bdd16b4 4625 def _real_extract(self, url):
5ad28e7f 4626 mobj = self._match_valid_url(url)
29f7c58a 4627 video_id = mobj.group('id')
4628 playlist_id = mobj.group('playlist_id')
8bdd16b4 4629 return self.url_result(
29f7c58a 4630 update_url_query('https://www.youtube.com/watch', {
4631 'v': video_id,
4632 'list': playlist_id,
4633 'feature': 'youtu.be',
4634 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4635
4636
4637class YoutubeYtUserIE(InfoExtractor):
96565c7e 4638 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
8bdd16b4 4639 _VALID_URL = r'ytuser:(?P<id>.+)'
4640 _TESTS = [{
4641 'url': 'ytuser:phihag',
4642 'only_matching': True,
4643 }]
4644
4645 def _real_extract(self, url):
4646 user_id = self._match_id(url)
4647 return self.url_result(
c586f9e8 4648 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 4649 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4650
b05654f0 4651
3d3dddc9 4652class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4653 IE_NAME = 'youtube:favorites'
96565c7e 4654 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 4655 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4656 _LOGIN_REQUIRED = True
4657 _TESTS = [{
4658 'url': ':ytfav',
4659 'only_matching': True,
4660 }, {
4661 'url': ':ytfavorites',
4662 'only_matching': True,
4663 }]
4664
4665 def _real_extract(self, url):
4666 return self.url_result(
4667 'https://www.youtube.com/playlist?list=LL',
4668 ie=YoutubeTabIE.ie_key())
4669
4670
a6213a49 4671class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4672 IE_DESC = 'YouTube search'
78caa52a 4673 IE_NAME = 'youtube:search'
b05654f0 4674 _SEARCH_KEY = 'ytsearch'
a61fd4cf 4675 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
9dd8e46a 4676 _TESTS = []
b05654f0 4677
a61fd4cf 4678
5f7cb91a 4679class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 4680 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4681 _SEARCH_KEY = 'ytsearchdate'
a6213a49 4682 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 4683 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
75dff0ee 4684
c9ae7b95 4685
a6213a49 4686class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 4687 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 4688 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4689 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
3462ffa8 4690 _TESTS = [{
4691 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4692 'playlist_mincount': 5,
4693 'info_dict': {
11f9be09 4694 'id': 'youtube-dl test video',
3462ffa8 4695 'title': 'youtube-dl test video',
4696 }
a61fd4cf 4697 }, {
4698 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4699 'playlist_mincount': 5,
4700 'info_dict': {
4701 'id': 'python',
4702 'title': 'python',
4703 }
4704
3462ffa8 4705 }, {
4706 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4707 'only_matching': True,
4708 }]
4709
4710 def _real_extract(self, url):
4dfbf869 4711 qs = parse_qs(url)
386e1dd9 4712 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 4713 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 4714
4715
4716class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4717 """
25f14e9f 4718 Base class for feed extractors
3d3dddc9 4719 Subclasses must define the _FEED_NAME property.
d7ae0639 4720 """
b2e8bc1b 4721 _LOGIN_REQUIRED = True
ef2f3c7f 4722 _TESTS = []
d7ae0639
JMF
4723
4724 @property
4725 def IE_NAME(self):
78caa52a 4726 return 'youtube:%s' % self._FEED_NAME
04cc9617 4727
3853309f 4728 def _real_extract(self, url):
3d3dddc9 4729 return self.url_result(
4730 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4731 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4732
4733
ef2f3c7f 4734class YoutubeWatchLaterIE(InfoExtractor):
4735 IE_NAME = 'youtube:watchlater'
96565c7e 4736 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 4737 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4738 _TESTS = [{
8bdd16b4 4739 'url': ':ytwatchlater',
bc7a9cd8
S
4740 'only_matching': True,
4741 }]
25f14e9f
S
4742
4743 def _real_extract(self, url):
ef2f3c7f 4744 return self.url_result(
4745 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4746
4747
25f14e9f 4748class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 4749 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 4750 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4751 _FEED_NAME = 'recommended'
45db527f 4752 _LOGIN_REQUIRED = False
3d3dddc9 4753 _TESTS = [{
4754 'url': ':ytrec',
4755 'only_matching': True,
4756 }, {
4757 'url': ':ytrecommended',
4758 'only_matching': True,
4759 }, {
4760 'url': 'https://youtube.com',
4761 'only_matching': True,
4762 }]
1ed5b5c9 4763
1ed5b5c9 4764
25f14e9f 4765class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 4766 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 4767 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4768 _FEED_NAME = 'subscriptions'
3d3dddc9 4769 _TESTS = [{
4770 'url': ':ytsubs',
4771 'only_matching': True,
4772 }, {
4773 'url': ':ytsubscriptions',
4774 'only_matching': True,
4775 }]
1ed5b5c9 4776
1ed5b5c9 4777
25f14e9f 4778class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 4779 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 4780 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4781 _FEED_NAME = 'history'
3d3dddc9 4782 _TESTS = [{
4783 'url': ':ythistory',
4784 'only_matching': True,
4785 }]
1ed5b5c9
JMF
4786
4787
15870e90
PH
4788class YoutubeTruncatedURLIE(InfoExtractor):
4789 IE_NAME = 'youtube:truncated_url'
4790 IE_DESC = False # Do not list
975d35db 4791 _VALID_URL = r'''(?x)
b95aab84
PH
4792 (?:https?://)?
4793 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4794 (?:watch\?(?:
c4808c60 4795 feature=[a-z_]+|
b95aab84
PH
4796 annotation_id=annotation_[^&]+|
4797 x-yt-cl=[0-9]+|
c1708b89 4798 hl=[^&]*|
287be8c6 4799 t=[0-9]+
b95aab84
PH
4800 )?
4801 |
4802 attribution_link\?a=[^&]+
4803 )
4804 $
975d35db 4805 '''
15870e90 4806
c4808c60 4807 _TESTS = [{
2d3d2997 4808 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4809 'only_matching': True,
dc2fc736 4810 }, {
2d3d2997 4811 'url': 'https://www.youtube.com/watch?',
dc2fc736 4812 'only_matching': True,
b95aab84
PH
4813 }, {
4814 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4815 'only_matching': True,
4816 }, {
4817 'url': 'https://www.youtube.com/watch?feature=foo',
4818 'only_matching': True,
c1708b89
PH
4819 }, {
4820 'url': 'https://www.youtube.com/watch?hl=en-GB',
4821 'only_matching': True,
287be8c6
PH
4822 }, {
4823 'url': 'https://www.youtube.com/watch?t=2372',
4824 'only_matching': True,
c4808c60
PH
4825 }]
4826
15870e90
PH
4827 def _real_extract(self, url):
4828 raise ExtractorError(
78caa52a
PH
4829 'Did you forget to quote the URL? Remember that & is a meta '
4830 'character in most shells, so you want to put the URL in quotes, '
3867038a 4831 'like youtube-dl '
2d3d2997 4832 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4833 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4834 expected=True)
772fd5cc
PH
4835
4836
3cd786db 4837class YoutubeClipIE(InfoExtractor):
4838 IE_NAME = 'youtube:clip'
4839 IE_DESC = False # Do not list
4840 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4841
4842 def _real_extract(self, url):
4843 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4844 return self.url_result(url, 'Generic')
4845
4846
772fd5cc
PH
4847class YoutubeTruncatedIDIE(InfoExtractor):
4848 IE_NAME = 'youtube:truncated_id'
4849 IE_DESC = False # Do not list
b95aab84 4850 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4851
4852 _TESTS = [{
4853 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4854 'only_matching': True,
4855 }]
4856
4857 def _real_extract(self, url):
4858 video_id = self._match_id(url)
4859 raise ExtractorError(
4860 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4861 expected=True)